This is the formal analysis file for the “The Nature of Reasonableness” by by Kevin Tobia, Ivar R. Hannikainen, David Kamper, Guilherme Almeida, Piotr Bystranowski, Niek Strohmaier, Vilius Dranseika, Markus Kneer, Fernando Aguiar, Kristina Dolinina, Bartosz Janik, Eglė Lauraitytė, Alice Liefgreen, Maciej Próchnicki, Alejandro Rosas, Vivek Kumar Shukla, and Noel Struchiner (2025, Stanford Journal of International Law).
# Load Libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stats)
library(ggplot2)
library(ggthemes)
library(corrplot)
## corrplot 0.95 loaded
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
DATA_PATH <- "/Users/dgkamper/Library/CloudStorage/GoogleDrive-dgkamper@gmail.com/My Drive/DGK Lab/Collaborations/Language and Law Laboratory/Analysis/LegalResonablenessAnalysis/Data_all_numeric.csv"
# Read the data from specified path
Data_All <- read.csv(DATA_PATH, stringsAsFactors = FALSE)
# Verify data structure
glimpse(Data_All)
## Rows: 2,426
## Columns: 42
## $ q1 <dbl> 2, 2, 3, 2, 2, 3, 2, 3, 2, 2, 3, 3, 4, 1, 4, 3, 3, 2, 2…
## $ q2 <dbl> 0, 1, 3, 1, 2, 15, 5, 2, 7, 2, 5, 3, 5, 7, 20, 5, 4, 2,…
## $ q3 <dbl> 8, 7, 10, 10, 3, 14, 2, 5, 8, 7, 3, 3, 5, 5, 4, 2, 7, 5…
## $ q4 <dbl> 2500, 2000, 2000, 50, 2000, 1200, 2, 1800, 2000, 1500, …
## $ q5 <dbl> 120, 30, 50, 50, 60, 30, 3, 30, 100, 60, 30, 30, 40, 30…
## $ q6 <dbl> 0, 1, 1, 0, 2, 0, 5, 0, 10, 5, 0, 7, 4, 4, 10, 10, 7, 5…
## $ q7 <chr> "20", "2", "30", "2", "15", "0", "5", "10", "15", "10",…
## $ q8 <dbl> 100, 10, 1, 20, 12, 5, 5, 4, 12, 12, 3, 4, 2, 12, 3, 10…
## $ q9 <dbl> 0, 2, 10, 1, 4, 3, 10, 3, 6, 8, 5, 10, 2, 6, 25, 7, 10,…
## $ q10 <dbl> 3, 50, 5, 5, 3, 1, 4, 1, 8, 0, 0, 10, 1, 1, 2, 0, 1, 1,…
## $ q11 <chr> "0", "1000", "50", "5", "250", "0", "7", "0", "400", "0…
## $ q12 <dbl> 0, 10, 10, 10, 5, 0, 6, 1, 20, 75, 5, 30, 30, 1, 1, 5, …
## $ q13 <dbl> 25, 1, 20, 15, 10, 10, 2, 6, 15, 100, 16, 50, 50, 4, 25…
## $ q14 <chr> "30", "5", "15", "3", "5", "10", "8", "5", "10", "5", "…
## $ q15 <dbl> 25, 30, 5, 5, 7, 25, 6, 4, 25, 4, 12, 4, 5, 4, 2, 6, 8,…
## $ q16 <dbl> 5, 30, 2, 3, 30, 25, 6, 4, 4, 5, 4, 4, 4, 4, 2, 2, 30, …
## $ q17 <chr> "3", "1", "1", "1", "1", "0", "1", "0", "2", "1", "0", …
## $ q18 <dbl> 12, 1, 5, 15, 10, 0, 1, 1, 5, 0, 0, 2, 10, 5, 15, 5, 5,…
## $ q19 <int> 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,…
## $ q20 <int> 0, 75, 5, 1, 10, 0, 4, 2, 30, 0, 0, 40, 15, 1, 10, 0, 8…
## $ q21 <dbl> 0, 12, 6, 2, 10, 6, 4, 4, 6, 5, 6, 9, 6, 5, 10, 7, 6, 8…
## $ q22 <dbl> 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4…
## $ q23 <dbl> 1, 0, 2, 3, 3, 10, 6, 1, 8, 5, 1, 2, 4, 2, 1, 4, 8, 2, …
## $ q24 <dbl> 5, 2, 5, 3, 4, 48, 4, 24, 72, 24, 6, 10, 24, 10, 2, 6, …
## $ q25 <dbl> 0, 10, 1000, 5, 300, 0, 5, 1000, 20000, 1000, 500, 2000…
## $ q26 <dbl> 3, 2, 2, 2, 2, 1, 4, 2, 4, 2, 2, 4, 6, 3, 1, 2, 3, 8, 2…
## $ q27 <dbl> 0, 6, 2, 5, 6, 3, 50, 5, 25, 30, 0, 6, 5, 5, 5, 12, 6, …
## $ q28 <dbl> 5, 5, 10, 50, 20, 5, 5, 10, 25, 60, 100, 80, 5, 20, 10,…
## $ q29 <dbl> 100, 50, 2, 25, 100, 100, 5, 50, 100, 100, 100, 100, 15…
## $ q30 <dbl> 3, 4, 2, 1, 6, 1, 2, 12, 26, 5, 0, 1, 8, 2, 4, 3, 4, 2,…
## $ q31 <dbl> 20, 750, 2, 12, 25, 0, 3, 100, 10, 30, 250, 50, 50, 10,…
## $ q32 <dbl> 5, 1, 2, 48, 24, 24, 55, 24, 24, 24, 24, 48, 24, 48, 12…
## $ q33 <chr> "0", "10", "2", "25", "4", "8", "2", "4", "4", "3.2", "…
## $ q34 <dbl> 95, 1, 2, 15, 50, 0, 5, 0, 95, 85, 100, 99, 50, 30, 15,…
## $ age <int> 18, 47, 28, 30, NA, NA, 30, 50, 47, 33, 0, 0, 30, 43, 4…
## $ Gender <chr> "Male", "Male", "Male", "Male", "Female", "Female", "Ma…
## $ ID <chr> "usa1", "usa2", "usa3", "usa4", "usa5", "usa6", "usa7",…
## $ Country_name <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA",…
## $ Condition_name <chr> "Reasonable", "Reasonable", "Reasonable", "Reasonable",…
## $ ID.1 <chr> "usa1", "usa2", "usa3", "usa4", "usa5", "usa6", "usa7",…
## $ Country <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Condition <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
# Split data by condition
Data_All_Average <- Data_All %>% filter(Condition_name == "Average")
Data_All_Ideal <- Data_All %>% filter(Condition_name == "Ideal")
Data_All_Reasonable <- Data_All %>% filter(Condition_name == "Reasonable")
# Exclude participants whose answer to Q19 does not equal 15 [comprehension/attention question].
Data_All_Average_Pass1 <- Data_All_Average %>% filter(q19 == 15)
Data_All_Ideal_Pass1 <- Data_All_Ideal %>% filter(q19 == 15)
Data_All_Reasonable_Pass1 <- Data_All_Reasonable %>% filter(q19 == 15)
# Print participant counts
condition_counts <- data.frame(
Condition = c("Average", "Ideal", "Reasonable"),
Original_Count = c(nrow(Data_All_Average), nrow(Data_All_Ideal), nrow(Data_All_Reasonable)),
After_Attention_Check = c(nrow(Data_All_Average_Pass1), nrow(Data_All_Ideal_Pass1), nrow(Data_All_Reasonable_Pass1)),
Excluded_Count = c(nrow(Data_All_Average) - nrow(Data_All_Average_Pass1),
nrow(Data_All_Ideal) - nrow(Data_All_Ideal_Pass1),
nrow(Data_All_Reasonable) - nrow(Data_All_Reasonable_Pass1)),
Exclusion_Rate = c((nrow(Data_All_Average) - nrow(Data_All_Average_Pass1))/nrow(Data_All_Average),
(nrow(Data_All_Ideal) - nrow(Data_All_Ideal_Pass1))/nrow(Data_All_Ideal),
(nrow(Data_All_Reasonable) - nrow(Data_All_Reasonable_Pass1))/nrow(Data_All_Reasonable))
)
# Format the exclusion rate as percentage
condition_counts$Exclusion_Rate <- scales::percent(condition_counts$Exclusion_Rate)
# Display the participant counts table
kable(condition_counts, caption = "Participant Counts Before and After Attention Check") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Condition | Original_Count | After_Attention_Check | Excluded_Count | Exclusion_Rate |
|---|---|---|---|---|
| Average | 815 | 793 | 22 | 2.70% |
| Ideal | 825 | 800 | 25 | 3.03% |
| Reasonable | 786 | 758 | 28 | 3.56% |
# Define the question columns - ensuring they actually exist in the data
question_cols <- c("q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10",
"q11", "q12", "q13", "q14", "q15", "q16", "q17", "q18",
"q20", "q21", "q22", "q23", "q24", "q25", "q26", "q27",
"q28", "q29", "q30", "q31", "q32", "q33", "q34")
# Check if all question columns exist in the data
existing_cols <- question_cols[question_cols %in% colnames(Data_All_Average_Pass1)]
missing_cols <- question_cols[!question_cols %in% colnames(Data_All_Average_Pass1)]
print("Existing question columns:")
## [1] "Existing question columns:"
print(existing_cols)
## [1] "q1" "q2" "q3" "q4" "q5" "q6" "q7" "q8" "q9" "q10" "q11" "q12"
## [13] "q13" "q14" "q15" "q16" "q17" "q18" "q20" "q21" "q22" "q23" "q24" "q25"
## [25] "q26" "q27" "q28" "q29" "q30" "q31" "q32" "q33" "q34"
print("Missing question columns:")
## [1] "Missing question columns:"
print(missing_cols)
## character(0)
# Continue with only the existing columns
question_cols <- existing_cols
# Convert columns to numeric
Data_All_Average_Pass1 <- Data_All_Average_Pass1 %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
Data_All_Ideal_Pass1 <- Data_All_Ideal_Pass1 %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
Data_All_Reasonable_Pass1 <- Data_All_Reasonable_Pass1 %>%
mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
# Verify the number of participants in each condition
cat("Ideal condition:", nrow(Data_All_Ideal_Pass1), "participants\n")
## Ideal condition: 800 participants
cat("Average condition:", nrow(Data_All_Average_Pass1), "participants\n")
## Average condition: 793 participants
cat("Reasonable condition:", nrow(Data_All_Reasonable_Pass1), "participants\n")
## Reasonable condition: 758 participants
No parts to be completed here: done in Analysis 3.
# Function to filter data based on 3 SD criterion
filter_data <- function(data, cols) {
# Pre-allocate a list to track excluded participants for each question
exclusions_by_question <- list()
# Original count
original_count <- nrow(data)
result <- data
for (col in cols) {
if (col %in% colnames(data)) {
# Calculate mean and standard deviation
avg <- mean(result[[col]], na.rm = TRUE)
s <- sd(result[[col]], na.rm = TRUE)
# Identify outliers
outlier_indices <- which(!is.na(result[[col]]) & abs(result[[col]] - avg) > 3 * s)
exclusions_by_question[[col]] <- length(outlier_indices)
# Filter out outliers
result <- result %>%
filter(is.na(!!sym(col)) | abs(!!sym(col) - avg) <= 3 * s)
}
}
# Final count
final_count <- nrow(result)
return(list(
filtered_data = result,
original_count = original_count,
final_count = final_count,
excluded_count = original_count - final_count,
exclusion_rate = (original_count - final_count) / original_count,
exclusions_by_question = exclusions_by_question
))
}
# Apply filtering and track exclusion counts
avg_filter_results <- filter_data(Data_All_Average_Pass1, question_cols)
ideal_filter_results <- filter_data(Data_All_Ideal_Pass1, question_cols)
reasonable_filter_results <- filter_data(Data_All_Reasonable_Pass1, question_cols)
# Extract the filtered data
Data_All_Average_Filtered <- avg_filter_results$filtered_data
Data_All_Ideal_Filtered <- ideal_filter_results$filtered_data
Data_All_Reasonable_Filtered <- reasonable_filter_results$filtered_data
# Display outlier exclusion summary
outlier_summary <- data.frame(
Condition = c("Average", "Ideal", "Reasonable"),
Original_Count = c(avg_filter_results$original_count,
ideal_filter_results$original_count,
reasonable_filter_results$original_count),
After_Outlier_Removal = c(avg_filter_results$final_count,
ideal_filter_results$final_count,
reasonable_filter_results$final_count),
Excluded_Count = c(avg_filter_results$excluded_count,
ideal_filter_results$excluded_count,
reasonable_filter_results$excluded_count),
Exclusion_Rate = c(avg_filter_results$exclusion_rate,
ideal_filter_results$exclusion_rate,
reasonable_filter_results$exclusion_rate)
)
# Format the exclusion rate as percentage
outlier_summary$Exclusion_Rate <- scales::percent(outlier_summary$Exclusion_Rate)
# Display the outlier exclusion summary table
kable(outlier_summary, caption = "Participant Counts Before and After Outlier Removal") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Condition | Original_Count | After_Outlier_Removal | Excluded_Count | Exclusion_Rate |
|---|---|---|---|---|
| Average | 793 | 609 | 184 | 23.20% |
| Ideal | 800 | 608 | 192 | 24.00% |
| Reasonable | 758 | 590 | 168 | 22.16% |
# Define helper function for coefficients
get_model_coefs <- function(model) {
summary_model <- summary(model)
coef_table <- summary_model$coefficients
data.frame(
Term = rownames(coef_table),
Estimate = coef_table[, "Estimate"],
Std_Error = coef_table[, "Std. Error"],
t_value = coef_table[, "t value"],
p_value = coef_table[, "Pr(>|t|)"]
)
}
# Define helper function for p-values
format_pvalue <- function(p_value) {
if (is.na(p_value)) {
return("NA")
} else if (p_value < 0.001) {
return("< .001")
} else {
# Format to 3 decimal places, removing leading zero if present
formatted_p <- sprintf("%.3f", p_value)
# Remove leading zero for values less than 1 (e.g., "0.123" -> ".123")
formatted_p <- sub("^0\\.", ".", formatted_p)
# Handle case where p-value rounds to 1.000
if (formatted_p == "1.000" && p_value < 1) {
return(".999") # Or indicate somehow it's slightly less than 1
} else if (formatted_p == "1.000" && p_value >= 1) {
return("1.000") # Or handle as appropriate if p can be >= 1
}
return(formatted_p)
}
}
# Calculate medians from attention-check filtered data ONLY
column_medians_average_no3sd <- sapply(Data_All_Average_Pass1[, question_cols], median, na.rm = TRUE)
column_medians_ideal_no3sd <- sapply(Data_All_Ideal_Pass1[, question_cols], median, na.rm = TRUE)
column_medians_reasonable_no3sd <- sapply(Data_All_Reasonable_Pass1[, question_cols], median, na.rm = TRUE)
# Create a dataframe for the medians (NO 3SD)
medians_df_no3sd <- data.frame(
Question = names(column_medians_average_no3sd),
Average = column_medians_average_no3sd,
Ideal = column_medians_ideal_no3sd,
Reasonable = column_medians_reasonable_no3sd
)
# Display the medians for each question
kable(medians_df_no3sd, caption = "Median Values (NO 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average | Ideal | Reasonable | |
|---|---|---|---|---|
| q1 | q1 | 3.0 | 2.0 | 2.0 |
| q2 | q2 | 7.0 | 2.0 | 4.0 |
| q3 | q3 | 3.0 | 6.0 | 5.0 |
| q4 | q4 | 2000.0 | 2000.0 | 2000.0 |
| q5 | q5 | 20.0 | 30.0 | 30.0 |
| q6 | q6 | 10.0 | 1.0 | 3.0 |
| q7 | q7 | 15.0 | 5.0 | 10.0 |
| q8 | q8 | 5.0 | 12.0 | 10.0 |
| q9 | q9 | 5.0 | 3.0 | 5.0 |
| q10 | q10 | 5.0 | 0.0 | 2.0 |
| q11 | q11 | 500.0 | 0.0 | 10.0 |
| q12 | q12 | 30.0 | 1.0 | 10.0 |
| q13 | q13 | 40.0 | 10.0 | 15.0 |
| q14 | q14 | 10.0 | 2.0 | 5.0 |
| q15 | q15 | 9.0 | 10.0 | 8.0 |
| q16 | q16 | 5.0 | 5.0 | 4.0 |
| q17 | q17 | 3.0 | 0.0 | 1.0 |
| q18 | q18 | 15.0 | 2.0 | 9.5 |
| q20 | q20 | 20.0 | 0.0 | 4.0 |
| q21 | q21 | 8.0 | 5.0 | 5.0 |
| q22 | q22 | 10.0 | 7.0 | 7.0 |
| q23 | q23 | 2.0 | 3.0 | 3.0 |
| q24 | q24 | 16.0 | 24.0 | 24.0 |
| q25 | q25 | 2000.0 | 500.0 | 1000.0 |
| q26 | q26 | 8.0 | 2.0 | 4.0 |
| q27 | q27 | 12.0 | 5.0 | 6.0 |
| q28 | q28 | 10.0 | 20.0 | 20.0 |
| q29 | q29 | 55.0 | 90.0 | 80.0 |
| q30 | q30 | 12.0 | 4.0 | 4.0 |
| q31 | q31 | 50.0 | 13.0 | 25.0 |
| q32 | q32 | 24.0 | 48.0 | 40.0 |
| q33 | q33 | 5.5 | 2.5 | 4.0 |
| q34 | q34 | 50.0 | 10.0 | 50.0 |
# Define Log function: Small constant for zero
log_smallvalue <- function(x) {
x_safe <- x
# Replace only actual zeros, leave NAs as NA
x_safe[which(x == 0 & !is.na(x))] <- 0.01
log(x_safe)
}
# Apply the log function to NO 3SD medians
log_medians_average_no3sd_smallval <- log_smallvalue(column_medians_average_no3sd)
log_medians_ideal_no3sd_smallval <- log_smallvalue(column_medians_ideal_no3sd)
log_medians_reasonable_no3sd_smallval <- log_smallvalue(column_medians_reasonable_no3sd)
# Create a dataframe for the log medians (NO 3SD, Small Constant)
log_medians_df_no3sd_smallval <- data.frame(
Question = names(log_medians_average_no3sd_smallval),
Average = log_medians_average_no3sd_smallval,
Ideal = log_medians_ideal_no3sd_smallval,
Reasonable = log_medians_reasonable_no3sd_smallval
)
# Display the log medians
kable(log_medians_df_no3sd_smallval, caption = "Log-Transformed (Small Constant) Median Values (NO 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average | Ideal | Reasonable | |
|---|---|---|---|---|
| q1 | q1 | 1.0986123 | 0.6931472 | 0.6931472 |
| q2 | q2 | 1.9459101 | 0.6931472 | 1.3862944 |
| q3 | q3 | 1.0986123 | 1.7917595 | 1.6094379 |
| q4 | q4 | 7.6009025 | 7.6009025 | 7.6009025 |
| q5 | q5 | 2.9957323 | 3.4011974 | 3.4011974 |
| q6 | q6 | 2.3025851 | 0.0000000 | 1.0986123 |
| q7 | q7 | 2.7080502 | 1.6094379 | 2.3025851 |
| q8 | q8 | 1.6094379 | 2.4849066 | 2.3025851 |
| q9 | q9 | 1.6094379 | 1.0986123 | 1.6094379 |
| q10 | q10 | 1.6094379 | -4.6051702 | 0.6931472 |
| q11 | q11 | 6.2146081 | -4.6051702 | 2.3025851 |
| q12 | q12 | 3.4011974 | 0.0000000 | 2.3025851 |
| q13 | q13 | 3.6888795 | 2.3025851 | 2.7080502 |
| q14 | q14 | 2.3025851 | 0.6931472 | 1.6094379 |
| q15 | q15 | 2.1972246 | 2.3025851 | 2.0794415 |
| q16 | q16 | 1.6094379 | 1.6094379 | 1.3862944 |
| q17 | q17 | 1.0986123 | -4.6051702 | 0.0000000 |
| q18 | q18 | 2.7080502 | 0.6931472 | 2.2512918 |
| q20 | q20 | 2.9957323 | -4.6051702 | 1.3862944 |
| q21 | q21 | 2.0794415 | 1.6094379 | 1.6094379 |
| q22 | q22 | 2.3025851 | 1.9459101 | 1.9459101 |
| q23 | q23 | 0.6931472 | 1.0986123 | 1.0986123 |
| q24 | q24 | 2.7725887 | 3.1780538 | 3.1780538 |
| q25 | q25 | 7.6009025 | 6.2146081 | 6.9077553 |
| q26 | q26 | 2.0794415 | 0.6931472 | 1.3862944 |
| q27 | q27 | 2.4849066 | 1.6094379 | 1.7917595 |
| q28 | q28 | 2.3025851 | 2.9957323 | 2.9957323 |
| q29 | q29 | 4.0073332 | 4.4998097 | 4.3820266 |
| q30 | q30 | 2.4849066 | 1.3862944 | 1.3862944 |
| q31 | q31 | 3.9120230 | 2.5649494 | 3.2188758 |
| q32 | q32 | 3.1780538 | 3.8712010 | 3.6888795 |
| q33 | q33 | 1.7047481 | 0.9162907 | 1.3862944 |
| q34 | q34 | 3.9120230 | 2.3025851 | 3.9120230 |
# Define Log function: Add 1
log_plus_one <- function(x) {
log(x + 1)
}
# Apply the log function to NO 3SD medians
log_medians_average_no3sd_plusone <- log_plus_one(column_medians_average_no3sd)
log_medians_ideal_no3sd_plusone <- log_plus_one(column_medians_ideal_no3sd)
log_medians_reasonable_no3sd_plusone <- log_plus_one(column_medians_reasonable_no3sd)
# Create a dataframe for the log medians (NO 3SD, Plus One)
log_medians_df_no3sd_plusone <- data.frame(
Question = names(log_medians_average_no3sd_plusone),
Average = log_medians_average_no3sd_plusone,
Ideal = log_medians_ideal_no3sd_plusone,
Reasonable = log_medians_reasonable_no3sd_plusone
)
# Display the log medians
kable(log_medians_df_no3sd_plusone, caption = "Log-Transformed (Plus One) Median Values (NO 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average | Ideal | Reasonable | |
|---|---|---|---|---|
| q1 | q1 | 1.386294 | 1.0986123 | 1.0986123 |
| q2 | q2 | 2.079442 | 1.0986123 | 1.6094379 |
| q3 | q3 | 1.386294 | 1.9459101 | 1.7917595 |
| q4 | q4 | 7.601402 | 7.6014023 | 7.6014023 |
| q5 | q5 | 3.044522 | 3.4339872 | 3.4339872 |
| q6 | q6 | 2.397895 | 0.6931472 | 1.3862944 |
| q7 | q7 | 2.772589 | 1.7917595 | 2.3978953 |
| q8 | q8 | 1.791759 | 2.5649494 | 2.3978953 |
| q9 | q9 | 1.791759 | 1.3862944 | 1.7917595 |
| q10 | q10 | 1.791759 | 0.0000000 | 1.0986123 |
| q11 | q11 | 6.216606 | 0.0000000 | 2.3978953 |
| q12 | q12 | 3.433987 | 0.6931472 | 2.3978953 |
| q13 | q13 | 3.713572 | 2.3978953 | 2.7725887 |
| q14 | q14 | 2.397895 | 1.0986123 | 1.7917595 |
| q15 | q15 | 2.302585 | 2.3978953 | 2.1972246 |
| q16 | q16 | 1.791759 | 1.7917595 | 1.6094379 |
| q17 | q17 | 1.386294 | 0.0000000 | 0.6931472 |
| q18 | q18 | 2.772589 | 1.0986123 | 2.3513753 |
| q20 | q20 | 3.044522 | 0.0000000 | 1.6094379 |
| q21 | q21 | 2.197225 | 1.7917595 | 1.7917595 |
| q22 | q22 | 2.397895 | 2.0794415 | 2.0794415 |
| q23 | q23 | 1.098612 | 1.3862944 | 1.3862944 |
| q24 | q24 | 2.833213 | 3.2188758 | 3.2188758 |
| q25 | q25 | 7.601402 | 6.2166061 | 6.9087548 |
| q26 | q26 | 2.197225 | 1.0986123 | 1.6094379 |
| q27 | q27 | 2.564949 | 1.7917595 | 1.9459101 |
| q28 | q28 | 2.397895 | 3.0445224 | 3.0445224 |
| q29 | q29 | 4.025352 | 4.5108595 | 4.3944492 |
| q30 | q30 | 2.564949 | 1.6094379 | 1.6094379 |
| q31 | q31 | 3.931826 | 2.6390573 | 3.2580965 |
| q32 | q32 | 3.218876 | 3.8918203 | 3.7135721 |
| q33 | q33 | 1.871802 | 1.2527630 | 1.6094379 |
| q34 | q34 | 3.931826 | 2.3978953 | 3.9318256 |
# Create data frame for regression (NO 3SD, Small Constant)
dataforAICMedians_no3sd_smallval <- data.frame(
Reasonable = log_medians_reasonable_no3sd_smallval,
Average = log_medians_average_no3sd_smallval,
Ideal = log_medians_ideal_no3sd_smallval
)
# Filter out rows with non-finite values before regression
dataforAICMedians_no3sd_smallval_clean <- dataforAICMedians_no3sd_smallval %>%
filter(is.finite(Reasonable) & is.finite(Average) & is.finite(Ideal))
# Check if enough data points remain
if(nrow(dataforAICMedians_no3sd_smallval_clean) > 3) {
# Model I
Model1Medians_no3sd_smallval <- lm(Reasonable ~ Average, data = dataforAICMedians_no3sd_smallval_clean)
AIC1Medians_no3sd_smallval <- AIC(Model1Medians_no3sd_smallval)
r2_model1_median_no3sd_smallval <- summary(Model1Medians_no3sd_smallval)$r.squared
# Model II
Model2Medians_no3sd_smallval <- lm(Reasonable ~ Ideal, data = dataforAICMedians_no3sd_smallval_clean)
AIC2Medians_no3sd_smallval <- AIC(Model2Medians_no3sd_smallval)
r2_model2_median_no3sd_smallval <- summary(Model2Medians_no3sd_smallval)$r.squared
# Model III
Model3Medians_no3sd_smallval <- lm(Reasonable ~ Average + Ideal, data = dataforAICMedians_no3sd_smallval_clean)
AIC3Medians_no3sd_smallval <- AIC(Model3Medians_no3sd_smallval)
r2_model3_median_no3sd_smallval <- summary(Model3Medians_no3sd_smallval)$r.squared
# Create a regression summary dataframe
regression_summary_medians_no3sd_smallval <- data.frame(
Model = c("Average predicts Reasonable",
"Ideal predicts Reasonable",
"Average + Ideal predict Reasonable"),
AIC = c(AIC1Medians_no3sd_smallval, AIC2Medians_no3sd_smallval, AIC3Medians_no3sd_smallval),
R_squared = c(r2_model1_median_no3sd_smallval, r2_model2_median_no3sd_smallval, r2_model3_median_no3sd_smallval)
)
# Display the regression summary table
kable(regression_summary_medians_no3sd_smallval, caption = "Regression Comparison (Median, NO 3SD, Log Small Constant)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
} else {
cat("Not enough valid data points for regression (NO 3SD, Log Small Constant).\n")
regression_summary_medians_no3sd_smallval <- data.frame(Model=character(), AIC=numeric(), R_squared=numeric())
}
| Model | AIC | R_squared |
|---|---|---|
| Average predicts Reasonable | 85.96537 | 0.7350584 |
| Ideal predicts Reasonable | 102.37505 | 0.5643791 |
| Average + Ideal predict Reasonable | 39.38395 | 0.9392149 |
# Create data frame for regression (NO 3SD, Plus One)
dataforAICMedians_no3sd_plusone <- data.frame(
Reasonable = log_medians_reasonable_no3sd_plusone,
Average = log_medians_average_no3sd_plusone,
Ideal = log_medians_ideal_no3sd_plusone
)
# Filter out rows with non-finite values before regression
dataforAICMedians_no3sd_plusone_clean <- dataforAICMedians_no3sd_plusone %>%
filter(is.finite(Reasonable) & is.finite(Average) & is.finite(Ideal))
# Check if enough data points remain
if(nrow(dataforAICMedians_no3sd_plusone_clean) > 3) {
# Model I
Model1Medians_no3sd_plusone <- lm(Reasonable ~ Average, data = dataforAICMedians_no3sd_plusone_clean)
AIC1Medians_no3sd_plusone <- AIC(Model1Medians_no3sd_plusone)
r2_model1_median_no3sd_plusone <- summary(Model1Medians_no3sd_plusone)$r.squared
# Model II
Model2Medians_no3sd_plusone <- lm(Reasonable ~ Ideal, data = dataforAICMedians_no3sd_plusone_clean)
AIC2Medians_no3sd_plusone <- AIC(Model2Medians_no3sd_plusone)
r2_model2_median_no3sd_plusone <- summary(Model2Medians_no3sd_plusone)$r.squared
# Model III
Model3Medians_no3sd_plusone <- lm(Reasonable ~ Average + Ideal, data = dataforAICMedians_no3sd_plusone_clean)
AIC3Medians_no3sd_plusone <- AIC(Model3Medians_no3sd_plusone)
r2_model3_median_no3sd_plusone <- summary(Model3Medians_no3sd_plusone)$r.squared
# Create a regression summary dataframe
regression_summary_medians_no3sd_plusone <- data.frame(
Model = c("Average predicts Reasonable",
"Ideal predicts Reasonable",
"Average + Ideal predict Reasonable"),
AIC = c(AIC1Medians_no3sd_plusone, AIC2Medians_no3sd_plusone, AIC3Medians_no3sd_plusone),
R_squared = c(r2_model1_median_no3sd_plusone, r2_model2_median_no3sd_plusone, r2_model3_median_no3sd_plusone)
)
# Display the regression summary table
kable(regression_summary_medians_no3sd_plusone, caption = "Regression Comparison (Median, NO 3SD, Log Plus One)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
} else {
cat("Not enough valid data points for regression (NO 3SD, Log Plus One).\n")
regression_summary_medians_no3sd_plusone <- data.frame(Model=character(), AIC=numeric(), R_squared=numeric())
}
| Model | AIC | R_squared |
|---|---|---|
| Average predicts Reasonable | 80.731562 | 0.7415783 |
| Ideal predicts Reasonable | 62.561052 | 0.8509963 |
| Average + Ideal predict Reasonable | 9.982732 | 0.9714949 |
# Check if models exist before plotting
if(exists("Model1Medians_no3sd_smallval") && exists("dataforAICMedians_no3sd_smallval_clean") && nrow(dataforAICMedians_no3sd_smallval_clean) > 0) {
# Average Median vs Reasonable Median
plot1_median_no3sd_smallval <- ggplot(dataforAICMedians_no3sd_smallval_clean, aes(x = Average, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) + geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Model 1 (NO 3SD, Log Small Const): Median Average vs Reasonable",
subtitle = paste("AIC =", round(AIC1Medians_no3sd_smallval, 2), ", R² =", round(r2_model1_median_no3sd_smallval, 3)),
x = "Log (Small Const) Median Average Rating", y = "Log (Small Const) Median Reasonable Rating") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Ideal Median vs Reasonable Median
plot2_median_no3sd_smallval <- ggplot(dataforAICMedians_no3sd_smallval_clean, aes(x = Ideal, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) + geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Model 2 (NO 3SD, Log Small Const): Median Ideal vs Reasonable",
subtitle = paste("AIC =", round(AIC2Medians_no3sd_smallval, 2), ", R² =", round(r2_model2_median_no3sd_smallval, 3)),
x = "Log (Small Const) Median Ideal Rating", y = "Log (Small Const) Median Reasonable Rating") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Model 3 (Average effect)
new_data_median_no3sd_smallval <- expand.grid( Average = seq(min(dataforAICMedians_no3sd_smallval_clean$Average, na.rm=TRUE), max(dataforAICMedians_no3sd_smallval_clean$Average, na.rm=TRUE), length.out = 100), Ideal = median(dataforAICMedians_no3sd_smallval_clean$Ideal, na.rm=TRUE) )
new_data_median_no3sd_smallval <- new_data_median_no3sd_smallval[is.finite(new_data_median_no3sd_smallval$Average) & is.finite(new_data_median_no3sd_smallval$Ideal),]
if(nrow(new_data_median_no3sd_smallval)>0){ new_data_median_no3sd_smallval$predicted_reasonable <- predict(Model3Medians_no3sd_smallval, newdata = new_data_median_no3sd_smallval) } else { new_data_median_no3sd_smallval <- data.frame(Average=numeric(), Ideal=numeric(), predicted_reasonable=numeric())}
plot3_median_no3sd_smallval <- ggplot(dataforAICMedians_no3sd_smallval_clean, aes(x = Average, y = Reasonable)) + geom_point(size = 3, alpha = 0.5) + geom_line(data = new_data_median_no3sd_smallval, aes(y = predicted_reasonable), color = "blue", size = 1) + labs(title = "Model 3 (NO 3SD, Log Small Const): Effect of Median Average", subtitle = paste("AIC =", round(AIC3Medians_no3sd_smallval, 2), ", R² =", round(r2_model3_median_no3sd_smallval, 3)), x = "Log (Small Const) Median Average Rating", y = "Log (Small Const) Median Reasonable Rating") + theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Model 3 (Ideal effect)
new_data2_median_no3sd_smallval <- expand.grid( Average = median(dataforAICMedians_no3sd_smallval_clean$Average, na.rm=TRUE), Ideal = seq(min(dataforAICMedians_no3sd_smallval_clean$Ideal, na.rm=TRUE), max(dataforAICMedians_no3sd_smallval_clean$Ideal, na.rm=TRUE), length.out = 100) )
new_data2_median_no3sd_smallval <- new_data2_median_no3sd_smallval[is.finite(new_data2_median_no3sd_smallval$Average) & is.finite(new_data2_median_no3sd_smallval$Ideal),]
if(nrow(new_data2_median_no3sd_smallval)>0){ new_data2_median_no3sd_smallval$predicted_reasonable <- predict(Model3Medians_no3sd_smallval, newdata = new_data2_median_no3sd_smallval) } else { new_data2_median_no3sd_smallval <- data.frame(Average=numeric(), Ideal=numeric(), predicted_reasonable=numeric())}
plot4_median_no3sd_smallval <- ggplot(dataforAICMedians_no3sd_smallval_clean, aes(x = Ideal, y = Reasonable)) + geom_point(size = 3, alpha = 0.5) + geom_line(data = new_data2_median_no3sd_smallval, aes(y = predicted_reasonable), color = "red", size = 1) + labs(title = "Model 3 (NO 3SD, Log Small Const): Effect of Median Ideal", subtitle = paste("AIC =", round(AIC3Medians_no3sd_smallval, 2), ", R² =", round(r2_model3_median_no3sd_smallval, 3)), x = "Log (Small Const) Median Ideal Rating", y = "Log (Small Const) Median Reasonable Rating") + theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Display plots
print(plot1_median_no3sd_smallval)
print(plot2_median_no3sd_smallval)
print(plot3_median_no3sd_smallval)
print(plot4_median_no3sd_smallval)
# Save plots
ggsave("model1_median_avg_reas_no3sd_smallval.png", plot1_median_no3sd_smallval, width = 8, height = 6, dpi = 300)
ggsave("model2_median_ideal_reas_no3sd_smallval.png", plot2_median_no3sd_smallval, width = 8, height = 6, dpi = 300)
ggsave("model3_median_avg_eff_no3sd_smallval.png", plot3_median_no3sd_smallval, width = 8, height = 6, dpi = 300)
ggsave("model3_median_ideal_eff_no3sd_smallval.png", plot4_median_no3sd_smallval, width = 8, height = 6, dpi = 300)
grid_plots_median_no3sd_smallval <- grid.arrange(plot1_median_no3sd_smallval, plot2_median_no3sd_smallval, plot3_median_no3sd_smallval, plot4_median_no3sd_smallval, ncol = 2)
ggsave("all_median_plots_no3sd_smallval.png", grid_plots_median_no3sd_smallval, width = 14, height = 10, dpi = 300)
# Correlation matrix
cor_data_no3sd_smallval <- dataforAICMedians_no3sd_smallval_clean[complete.cases(dataforAICMedians_no3sd_smallval_clean),]
if(nrow(cor_data_no3sd_smallval) > 1) {
cor_matrix_no3sd_smallval <- cor(cor_data_no3sd_smallval, use = "complete.obs")
corrplot(cor_matrix_no3sd_smallval, method = "color", type = "upper", addCoef.col = "black", tl.col = "black", tl.srt = 45, diag = FALSE, title = "Correlation Matrix (NO 3SD, Log Small Constant)", mar = c(0,0,1,0))
} else { cat("Not enough data for correlation matrix (NO 3SD, Log Small Constant).\n") }
# Coefficient tables
model1_coefs_no3sd_smallval <- get_model_coefs(Model1Medians_no3sd_smallval)
model2_coefs_no3sd_smallval <- get_model_coefs(Model2Medians_no3sd_smallval)
model3_coefs_no3sd_smallval <- get_model_coefs(Model3Medians_no3sd_smallval)
model1_coefs_no3sd_smallval$p_value <- sapply(model1_coefs_no3sd_smallval$p_value, format_pvalue)
model2_coefs_no3sd_smallval$p_value <- sapply(model2_coefs_no3sd_smallval$p_value, format_pvalue)
model3_coefs_no3sd_smallval$p_value <- sapply(model3_coefs_no3sd_smallval$p_value, format_pvalue)
kable(model1_coefs_no3sd_smallval, caption = "Model 1 Coeffs (NO 3SD, Log Small Const)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
kable(model2_coefs_no3sd_smallval, caption = "Model 2 Coeffs (NO 3SD, Log Small Const)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
kable(model3_coefs_no3sd_smallval, caption = "Model 3 Coeffs (NO 3SD, Log Small Const)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
# Print model summaries
cat("\nModel Summaries (NO 3SD, Log Small Constant):\n")
print(summary(Model1Medians_no3sd_smallval))
print(summary(Model2Medians_no3sd_smallval))
print(summary(Model3Medians_no3sd_smallval))
} else {
cat("Skipping plots and summaries for (NO 3SD, Log Small Constant) due to insufficient data.\n")
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
##
## Model Summaries (NO 3SD, Log Small Constant):
##
## Call:
## lm(formula = Reasonable ~ Average, data = dataforAICMedians_no3sd_smallval_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.90856 -0.36496 -0.02592 0.62745 1.22985
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01139 0.29154 0.039 0.969
## Average 0.83670 0.09022 9.274 1.88e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8385 on 31 degrees of freedom
## Multiple R-squared: 0.7351, Adjusted R-squared: 0.7265
## F-statistic: 86.01 on 1 and 31 DF, p-value: 1.878e-10
##
##
## Call:
## lm(formula = Reasonable ~ Ideal, data = dataforAICMedians_no3sd_smallval_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3866 -0.6934 -0.1773 0.3218 2.5365
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7771 0.2080 8.544 1.19e-09 ***
## Ideal 0.4367 0.0689 6.337 4.72e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.075 on 31 degrees of freedom
## Multiple R-squared: 0.5644, Adjusted R-squared: 0.5503
## F-statistic: 40.16 on 1 and 31 DF, p-value: 4.715e-07
##
##
## Call:
## lm(formula = Reasonable ~ Average + Ideal, data = dataforAICMedians_no3sd_smallval_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78273 -0.27802 -0.06571 0.33604 0.80237
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.16010 0.14273 1.122 0.271
## Average 0.64924 0.04773 13.601 2.29e-14 ***
## Ideal 0.28538 0.02843 10.038 4.19e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4083 on 30 degrees of freedom
## Multiple R-squared: 0.9392, Adjusted R-squared: 0.9352
## F-statistic: 231.8 on 2 and 30 DF, p-value: < 2.2e-16
# Store results
analysis_3_results_no3sd_smallval <- list(
medians_by_question = if(exists("medians_df_no3sd")) medians_df_no3sd else NULL,
log_medians_by_question = if(exists("log_medians_df_no3sd_smallval")) log_medians_df_no3sd_smallval else NULL,
regression_models = if(exists("Model1Medians_no3sd_smallval")) list(model1 = Model1Medians_no3sd_smallval, model2 = Model2Medians_no3sd_smallval, model3 = Model3Medians_no3sd_smallval) else NULL,
regression_summary = if(exists("regression_summary_medians_no3sd_smallval")) regression_summary_medians_no3sd_smallval else NULL,
aic_values = if(exists("AIC1Medians_no3sd_smallval")) c(AIC1 = AIC1Medians_no3sd_smallval, AIC2 = AIC2Medians_no3sd_smallval, AIC3 = AIC3Medians_no3sd_smallval) else NULL
)
# Check if models exist before plotting
if(exists("Model1Medians_no3sd_plusone") && exists("dataforAICMedians_no3sd_plusone_clean") && nrow(dataforAICMedians_no3sd_plusone_clean) > 0) {
# Average Median vs Reasonable Median
plot1_median_no3sd_plusone <- ggplot(dataforAICMedians_no3sd_plusone_clean, aes(x = Average, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) + geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Model 1 (NO 3SD, Log+1): Median Average vs Reasonable",
subtitle = paste("AIC =", round(AIC1Medians_no3sd_plusone, 2), ", R² =", round(r2_model1_median_no3sd_plusone, 3)),
x = "Log (+1) Median Average Rating", y = "Log (+1) Median Reasonable Rating") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Ideal Median vs Reasonable Median
plot2_median_no3sd_plusone <- ggplot(dataforAICMedians_no3sd_plusone_clean, aes(x = Ideal, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) + geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Model 2 (NO 3SD, Log+1): Median Ideal vs Reasonable",
subtitle = paste("AIC =", round(AIC2Medians_no3sd_plusone, 2), ", R² =", round(r2_model2_median_no3sd_plusone, 3)),
x = "Log (+1) Median Ideal Rating", y = "Log (+1) Median Reasonable Rating") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Model 3 (Average effect)
new_data_median_no3sd_plusone <- expand.grid( Average = seq(min(dataforAICMedians_no3sd_plusone_clean$Average, na.rm=TRUE), max(dataforAICMedians_no3sd_plusone_clean$Average, na.rm=TRUE), length.out = 100), Ideal = median(dataforAICMedians_no3sd_plusone_clean$Ideal, na.rm=TRUE) )
new_data_median_no3sd_plusone <- new_data_median_no3sd_plusone[is.finite(new_data_median_no3sd_plusone$Average) & is.finite(new_data_median_no3sd_plusone$Ideal),]
if(nrow(new_data_median_no3sd_plusone)>0){ new_data_median_no3sd_plusone$predicted_reasonable <- predict(Model3Medians_no3sd_plusone, newdata = new_data_median_no3sd_plusone) } else { new_data_median_no3sd_plusone <- data.frame(Average=numeric(), Ideal=numeric(), predicted_reasonable=numeric())}
plot3_median_no3sd_plusone <- ggplot(dataforAICMedians_no3sd_plusone_clean, aes(x = Average, y = Reasonable)) + geom_point(size = 3, alpha = 0.5) + geom_line(data = new_data_median_no3sd_plusone, aes(y = predicted_reasonable), color = "blue", size = 1) + labs(title = "Model 3 (NO 3SD, Log+1): Effect of Median Average", subtitle = paste("AIC =", round(AIC3Medians_no3sd_plusone, 2), ", R² =", round(r2_model3_median_no3sd_plusone, 3)), x = "Log (+1) Median Average Rating", y = "Log (+1) Median Reasonable Rating") + theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Model 3 (Ideal effect)
new_data2_median_no3sd_plusone <- expand.grid( Average = median(dataforAICMedians_no3sd_plusone_clean$Average, na.rm=TRUE), Ideal = seq(min(dataforAICMedians_no3sd_plusone_clean$Ideal, na.rm=TRUE), max(dataforAICMedians_no3sd_plusone_clean$Ideal, na.rm=TRUE), length.out = 100) )
new_data2_median_no3sd_plusone <- new_data2_median_no3sd_plusone[is.finite(new_data2_median_no3sd_plusone$Average) & is.finite(new_data2_median_no3sd_plusone$Ideal),]
if(nrow(new_data2_median_no3sd_plusone)>0){ new_data2_median_no3sd_plusone$predicted_reasonable <- predict(Model3Medians_no3sd_plusone, newdata = new_data2_median_no3sd_plusone) } else { new_data2_median_no3sd_plusone <- data.frame(Average=numeric(), Ideal=numeric(), predicted_reasonable=numeric())}
plot4_median_no3sd_plusone <- ggplot(dataforAICMedians_no3sd_plusone_clean, aes(x = Ideal, y = Reasonable)) + geom_point(size = 3, alpha = 0.5) + geom_line(data = new_data2_median_no3sd_plusone, aes(y = predicted_reasonable), color = "red", size = 1) + labs(title = "Model 3 (NO 3SD, Log+1): Effect of Median Ideal", subtitle = paste("AIC =", round(AIC3Medians_no3sd_plusone, 2), ", R² =", round(r2_model3_median_no3sd_plusone, 3)), x = "Log (+1) Median Ideal Rating", y = "Log (+1) Median Reasonable Rating") + theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Display plots
print(plot1_median_no3sd_plusone)
print(plot2_median_no3sd_plusone)
print(plot3_median_no3sd_plusone)
print(plot4_median_no3sd_plusone)
# Save plots
ggsave("model1_median_avg_reas_no3sd_plusone.png", plot1_median_no3sd_plusone, width = 8, height = 6, dpi = 300)
ggsave("model2_median_ideal_reas_no3sd_plusone.png", plot2_median_no3sd_plusone, width = 8, height = 6, dpi = 300)
ggsave("model3_median_avg_eff_no3sd_plusone.png", plot3_median_no3sd_plusone, width = 8, height = 6, dpi = 300)
ggsave("model3_median_ideal_eff_no3sd_plusone.png", plot4_median_no3sd_plusone, width = 8, height = 6, dpi = 300)
grid_plots_median_no3sd_plusone <- grid.arrange(plot1_median_no3sd_plusone, plot2_median_no3sd_plusone, plot3_median_no3sd_plusone, plot4_median_no3sd_plusone, ncol = 2)
ggsave("all_median_plots_no3sd_plusone.png", grid_plots_median_no3sd_plusone, width = 14, height = 10, dpi = 300)
# Correlation matrix
cor_data_no3sd_plusone <- dataforAICMedians_no3sd_plusone_clean[complete.cases(dataforAICMedians_no3sd_plusone_clean),]
if(nrow(cor_data_no3sd_plusone) > 1) {
cor_matrix_no3sd_plusone <- cor(cor_data_no3sd_plusone, use = "complete.obs")
corrplot(cor_matrix_no3sd_plusone, method = "color", type = "upper", addCoef.col = "black", tl.col = "black", tl.srt = 45, diag = FALSE, title = "Correlation Matrix (NO 3SD, Log Plus One)", mar = c(0,0,1,0))
} else { cat("Not enough data for correlation matrix (NO 3SD, Log Plus One).\n") }
# Coefficient tables
model1_coefs_no3sd_plusone <- get_model_coefs(Model1Medians_no3sd_plusone)
model2_coefs_no3sd_plusone <- get_model_coefs(Model2Medians_no3sd_plusone)
model3_coefs_no3sd_plusone <- get_model_coefs(Model3Medians_no3sd_plusone)
model1_coefs_no3sd_plusone$p_value <- sapply(model1_coefs_no3sd_plusone$p_value, format_pvalue)
model2_coefs_no3sd_plusone$p_value <- sapply(model2_coefs_no3sd_plusone$p_value, format_pvalue)
model3_coefs_no3sd_plusone$p_value <- sapply(model3_coefs_no3sd_plusone$p_value, format_pvalue)
kable(model1_coefs_no3sd_plusone, caption = "Model 1 Coeffs (NO 3SD, Log Plus One)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
kable(model2_coefs_no3sd_plusone, caption = "Model 2 Coeffs (NO 3SD, Log Plus One)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
kable(model3_coefs_no3sd_plusone, caption = "Model 3 Coeffs (NO 3SD, Log Plus One)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
# Print model summaries
cat("\nModel Summaries (NO 3SD, Log Plus One):\n")
print(summary(Model1Medians_no3sd_plusone))
print(summary(Model2Medians_no3sd_plusone))
print(summary(Model3Medians_no3sd_plusone))
} else {
cat("Skipping plots and summaries for (NO 3SD, Log Plus One) due to insufficient data.\n")
}
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
##
## Model Summaries (NO 3SD, Log Plus One):
##
## Call:
## lm(formula = Reasonable ~ Average, data = dataforAICMedians_no3sd_plusone_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.83433 -0.32028 -0.05108 0.53872 1.23137
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.12437 0.28692 0.433 0.668
## Average 0.82165 0.08711 9.432 1.27e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7746 on 31 degrees of freedom
## Multiple R-squared: 0.7416, Adjusted R-squared: 0.7332
## F-statistic: 88.96 on 1 and 31 DF, p-value: 1.271e-10
##
##
## Call:
## lm(formula = Reasonable ~ Ideal, data = dataforAICMedians_no3sd_plusone_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6811 -0.4486 -0.1409 0.2681 1.5866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.81133 0.16383 4.952 2.46e-05 ***
## Ideal 0.82556 0.06204 13.306 2.34e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5882 on 31 degrees of freedom
## Multiple R-squared: 0.851, Adjusted R-squared: 0.8462
## F-statistic: 177 on 1 and 31 DF, p-value: 2.345e-14
##
##
## Call:
## lm(formula = Reasonable ~ Average + Ideal, data = dataforAICMedians_no3sd_plusone_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.50094 -0.21285 0.00378 0.08933 0.78470
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.09186 0.09689 0.948 0.351
## Average 0.43408 0.03855 11.261 2.68e-12 ***
## Ideal 0.56239 0.03615 15.556 6.64e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2615 on 30 degrees of freedom
## Multiple R-squared: 0.9715, Adjusted R-squared: 0.9696
## F-statistic: 511.2 on 2 and 30 DF, p-value: < 2.2e-16
# Store results
analysis_3_results_no3sd_plusone <- list(
medians_by_question = if(exists("medians_df_no3sd")) medians_df_no3sd else NULL,
log_medians_by_question = if(exists("log_medians_df_no3sd_plusone")) log_medians_df_no3sd_plusone else NULL,
regression_models = if(exists("Model1Medians_no3sd_plusone")) list(model1 = Model1Medians_no3sd_plusone, model2 = Model2Medians_no3sd_plusone, model3 = Model3Medians_no3sd_plusone) else NULL,
regression_summary = if(exists("regression_summary_medians_no3sd_plusone")) regression_summary_medians_no3sd_plusone else NULL,
aic_values = if(exists("AIC1Medians_no3sd_plusone")) c(AIC1 = AIC1Medians_no3sd_plusone, AIC2 = AIC2Medians_no3sd_plusone, AIC3 = AIC3Medians_no3sd_plusone) else NULL
)
# Calculate medians from data filtered for attention check AND 3SD outliers
column_medians_average_3sd <- sapply(Data_All_Average_Filtered[, question_cols], median, na.rm = TRUE)
column_medians_ideal_3sd <- sapply(Data_All_Ideal_Filtered[, question_cols], median, na.rm = TRUE)
column_medians_reasonable_3sd <- sapply(Data_All_Reasonable_Filtered[, question_cols], median, na.rm = TRUE)
# Create a dataframe for the medians (WITH 3SD)
medians_df_3sd <- data.frame(
Question = names(column_medians_average_3sd),
Average = column_medians_average_3sd,
Ideal = column_medians_ideal_3sd,
Reasonable = column_medians_reasonable_3sd
)
# Display the medians for each question
kable(medians_df_3sd, caption = "Median Values (WITH 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average | Ideal | Reasonable | |
|---|---|---|---|---|
| q1 | q1 | 3.0 | 2 | 2 |
| q2 | q2 | 7.0 | 2 | 3 |
| q3 | q3 | 3.0 | 6 | 5 |
| q4 | q4 | 2000.0 | 2000 | 2000 |
| q5 | q5 | 20.0 | 30 | 30 |
| q6 | q6 | 10.0 | 1 | 3 |
| q7 | q7 | 15.0 | 5 | 10 |
| q8 | q8 | 5.0 | 12 | 10 |
| q9 | q9 | 5.0 | 3 | 5 |
| q10 | q10 | 5.0 | 0 | 2 |
| q11 | q11 | 500.0 | 0 | 7 |
| q12 | q12 | 30.0 | 0 | 10 |
| q13 | q13 | 40.0 | 10 | 15 |
| q14 | q14 | 10.0 | 2 | 5 |
| q15 | q15 | 8.0 | 10 | 8 |
| q16 | q16 | 4.0 | 5 | 4 |
| q17 | q17 | 3.0 | 0 | 1 |
| q18 | q18 | 10.0 | 1 | 5 |
| q20 | q20 | 20.0 | 0 | 2 |
| q21 | q21 | 8.0 | 5 | 5 |
| q22 | q22 | 7.0 | 7 | 7 |
| q23 | q23 | 2.0 | 2 | 3 |
| q24 | q24 | 15.0 | 24 | 24 |
| q25 | q25 | 2000.0 | 500 | 1000 |
| q26 | q26 | 8.0 | 2 | 4 |
| q27 | q27 | 11.5 | 4 | 6 |
| q28 | q28 | 10.0 | 20 | 20 |
| q29 | q29 | 54.5 | 90 | 80 |
| q30 | q30 | 12.0 | 3 | 4 |
| q31 | q31 | 50.0 | 10 | 20 |
| q32 | q32 | 24.0 | 48 | 40 |
| q33 | q33 | 5.0 | 2 | 3 |
| q34 | q34 | 50.0 | 5 | 50 |
# Define Log function: Small constant for zero
log_smallvalue <- function(x) {
x_safe <- x
# Replace only actual zeros, leave NAs as NA
x_safe[which(x == 0 & !is.na(x))] <- 0.01
log(x_safe)
}
# Apply the log function: log_smallvalue (defined earlier)
log_medians_average_3sd_smallval <- log_smallvalue(column_medians_average_3sd)
log_medians_ideal_3sd_smallval <- log_smallvalue(column_medians_ideal_3sd)
log_medians_reasonable_3sd_smallval <- log_smallvalue(column_medians_reasonable_3sd)
# Create a dataframe for the log medians (WITH 3SD, Small Constant)
log_medians_df_3sd_smallval <- data.frame(
Question = names(log_medians_average_3sd_smallval),
Average = log_medians_average_3sd_smallval,
Ideal = log_medians_ideal_3sd_smallval,
Reasonable = log_medians_reasonable_3sd_smallval
)
# Display the log medians
kable(log_medians_df_3sd_smallval, caption = "Log-Transformed (Small Constant) Median Values (WITH 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average | Ideal | Reasonable | |
|---|---|---|---|---|
| q1 | q1 | 1.0986123 | 0.6931472 | 0.6931472 |
| q2 | q2 | 1.9459101 | 0.6931472 | 1.0986123 |
| q3 | q3 | 1.0986123 | 1.7917595 | 1.6094379 |
| q4 | q4 | 7.6009025 | 7.6009025 | 7.6009025 |
| q5 | q5 | 2.9957323 | 3.4011974 | 3.4011974 |
| q6 | q6 | 2.3025851 | 0.0000000 | 1.0986123 |
| q7 | q7 | 2.7080502 | 1.6094379 | 2.3025851 |
| q8 | q8 | 1.6094379 | 2.4849066 | 2.3025851 |
| q9 | q9 | 1.6094379 | 1.0986123 | 1.6094379 |
| q10 | q10 | 1.6094379 | -4.6051702 | 0.6931472 |
| q11 | q11 | 6.2146081 | -4.6051702 | 1.9459101 |
| q12 | q12 | 3.4011974 | -4.6051702 | 2.3025851 |
| q13 | q13 | 3.6888795 | 2.3025851 | 2.7080502 |
| q14 | q14 | 2.3025851 | 0.6931472 | 1.6094379 |
| q15 | q15 | 2.0794415 | 2.3025851 | 2.0794415 |
| q16 | q16 | 1.3862944 | 1.6094379 | 1.3862944 |
| q17 | q17 | 1.0986123 | -4.6051702 | 0.0000000 |
| q18 | q18 | 2.3025851 | 0.0000000 | 1.6094379 |
| q20 | q20 | 2.9957323 | -4.6051702 | 0.6931472 |
| q21 | q21 | 2.0794415 | 1.6094379 | 1.6094379 |
| q22 | q22 | 1.9459101 | 1.9459101 | 1.9459101 |
| q23 | q23 | 0.6931472 | 0.6931472 | 1.0986123 |
| q24 | q24 | 2.7080502 | 3.1780538 | 3.1780538 |
| q25 | q25 | 7.6009025 | 6.2146081 | 6.9077553 |
| q26 | q26 | 2.0794415 | 0.6931472 | 1.3862944 |
| q27 | q27 | 2.4423470 | 1.3862944 | 1.7917595 |
| q28 | q28 | 2.3025851 | 2.9957323 | 2.9957323 |
| q29 | q29 | 3.9982007 | 4.4998097 | 4.3820266 |
| q30 | q30 | 2.4849066 | 1.0986123 | 1.3862944 |
| q31 | q31 | 3.9120230 | 2.3025851 | 2.9957323 |
| q32 | q32 | 3.1780538 | 3.8712010 | 3.6888795 |
| q33 | q33 | 1.6094379 | 0.6931472 | 1.0986123 |
| q34 | q34 | 3.9120230 | 1.6094379 | 3.9120230 |
# Define Log function: Add 1
log_plus_one <- function(x) {
log(x + 1)
}
# Apply the log function: log_plus_one (defined earlier)
log_medians_average_3sd_plusone <- log_plus_one(column_medians_average_3sd)
log_medians_ideal_3sd_plusone <- log_plus_one(column_medians_ideal_3sd)
log_medians_reasonable_3sd_plusone <- log_plus_one(column_medians_reasonable_3sd)
# Create a dataframe for the log medians (WITH 3SD, Plus One)
log_medians_df_3sd_plusone <- data.frame(
Question = names(log_medians_average_3sd_plusone),
Average = log_medians_average_3sd_plusone,
Ideal = log_medians_ideal_3sd_plusone,
Reasonable = log_medians_reasonable_3sd_plusone
)
# Display the log medians
kable(log_medians_df_3sd_plusone, caption = "Log-Transformed (Plus One) Median Values (WITH 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average | Ideal | Reasonable | |
|---|---|---|---|---|
| q1 | q1 | 1.386294 | 1.0986123 | 1.0986123 |
| q2 | q2 | 2.079442 | 1.0986123 | 1.3862944 |
| q3 | q3 | 1.386294 | 1.9459101 | 1.7917595 |
| q4 | q4 | 7.601402 | 7.6014023 | 7.6014023 |
| q5 | q5 | 3.044522 | 3.4339872 | 3.4339872 |
| q6 | q6 | 2.397895 | 0.6931472 | 1.3862944 |
| q7 | q7 | 2.772589 | 1.7917595 | 2.3978953 |
| q8 | q8 | 1.791759 | 2.5649494 | 2.3978953 |
| q9 | q9 | 1.791759 | 1.3862944 | 1.7917595 |
| q10 | q10 | 1.791759 | 0.0000000 | 1.0986123 |
| q11 | q11 | 6.216606 | 0.0000000 | 2.0794415 |
| q12 | q12 | 3.433987 | 0.0000000 | 2.3978953 |
| q13 | q13 | 3.713572 | 2.3978953 | 2.7725887 |
| q14 | q14 | 2.397895 | 1.0986123 | 1.7917595 |
| q15 | q15 | 2.197225 | 2.3978953 | 2.1972246 |
| q16 | q16 | 1.609438 | 1.7917595 | 1.6094379 |
| q17 | q17 | 1.386294 | 0.0000000 | 0.6931472 |
| q18 | q18 | 2.397895 | 0.6931472 | 1.7917595 |
| q20 | q20 | 3.044522 | 0.0000000 | 1.0986123 |
| q21 | q21 | 2.197225 | 1.7917595 | 1.7917595 |
| q22 | q22 | 2.079442 | 2.0794415 | 2.0794415 |
| q23 | q23 | 1.098612 | 1.0986123 | 1.3862944 |
| q24 | q24 | 2.772589 | 3.2188758 | 3.2188758 |
| q25 | q25 | 7.601402 | 6.2166061 | 6.9087548 |
| q26 | q26 | 2.197225 | 1.0986123 | 1.6094379 |
| q27 | q27 | 2.525729 | 1.6094379 | 1.9459101 |
| q28 | q28 | 2.397895 | 3.0445224 | 3.0445224 |
| q29 | q29 | 4.016383 | 4.5108595 | 4.3944492 |
| q30 | q30 | 2.564949 | 1.3862944 | 1.6094379 |
| q31 | q31 | 3.931826 | 2.3978953 | 3.0445224 |
| q32 | q32 | 3.218876 | 3.8918203 | 3.7135721 |
| q33 | q33 | 1.791759 | 1.0986123 | 1.3862944 |
| q34 | q34 | 3.931826 | 1.7917595 | 3.9318256 |
# Create data frame for regression (WITH 3SD, Small Constant)
dataforAICMedians_3sd_smallval <- data.frame(
Reasonable = log_medians_reasonable_3sd_smallval,
Average = log_medians_average_3sd_smallval,
Ideal = log_medians_ideal_3sd_smallval
)
# Filter out rows with non-finite values before regression
dataforAICMedians_3sd_smallval_clean <- dataforAICMedians_3sd_smallval %>%
filter(is.finite(Reasonable) & is.finite(Average) & is.finite(Ideal))
# Check if enough data points remain
if(nrow(dataforAICMedians_3sd_smallval_clean) > 3) {
# Model I
Model1Medians_3sd_smallval <- lm(Reasonable ~ Average, data = dataforAICMedians_3sd_smallval_clean)
AIC1Medians_3sd_smallval <- AIC(Model1Medians_3sd_smallval)
r2_model1_median_3sd_smallval <- summary(Model1Medians_3sd_smallval)$r.squared
# Model II
Model2Medians_3sd_smallval <- lm(Reasonable ~ Ideal, data = dataforAICMedians_3sd_smallval_clean)
AIC2Medians_3sd_smallval <- AIC(Model2Medians_3sd_smallval)
r2_model2_median_3sd_smallval <- summary(Model2Medians_3sd_smallval)$r.squared
# Model III
Model3Medians_3sd_smallval <- lm(Reasonable ~ Average + Ideal, data = dataforAICMedians_3sd_smallval_clean)
AIC3Medians_3sd_smallval <- AIC(Model3Medians_3sd_smallval)
r2_model3_median_3sd_smallval <- summary(Model3Medians_3sd_smallval)$r.squared
# Create a regression summary dataframe
regression_summary_medians_3sd_smallval <- data.frame(
Model = c("Average predicts Reasonable",
"Ideal predicts Reasonable",
"Average + Ideal predict Reasonable"),
AIC = c(AIC1Medians_3sd_smallval, AIC2Medians_3sd_smallval, AIC3Medians_3sd_smallval),
R_squared = c(r2_model1_median_3sd_smallval, r2_model2_median_3sd_smallval, r2_model3_median_3sd_smallval)
)
# Display the regression summary table
kable(regression_summary_medians_3sd_smallval, caption = "Regression Comparison (Median, WITH 3SD, Log Small Constant)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
} else {
cat("Not enough valid data points for regression (WITH 3SD, Log Small Constant).\n")
regression_summary_medians_3sd_smallval <- data.frame(Model=character(), AIC=numeric(), R_squared=numeric())
}
| Model | AIC | R_squared |
|---|---|---|
| Average predicts Reasonable | 92.68637 | 0.6876407 |
| Ideal predicts Reasonable | 105.28113 | 0.5424832 |
| Average + Ideal predict Reasonable | 50.95398 | 0.9169926 |
# Create data frame for regression (WITH 3SD, Plus One)
dataforAICMedians_3sd_plusone <- data.frame(
Reasonable = log_medians_reasonable_3sd_plusone,
Average = log_medians_average_3sd_plusone,
Ideal = log_medians_ideal_3sd_plusone
)
# Filter out rows with non-finite values before regression
dataforAICMedians_3sd_plusone_clean <- dataforAICMedians_3sd_plusone %>%
filter(is.finite(Reasonable) & is.finite(Average) & is.finite(Ideal))
# Check if enough data points remain
if(nrow(dataforAICMedians_3sd_plusone_clean) > 3) {
# Model I
Model1Medians_3sd_plusone <- lm(Reasonable ~ Average, data = dataforAICMedians_3sd_plusone_clean)
AIC1Medians_3sd_plusone <- AIC(Model1Medians_3sd_plusone)
r2_model1_median_3sd_plusone <- summary(Model1Medians_3sd_plusone)$r.squared
# Model II
Model2Medians_3sd_plusone <- lm(Reasonable ~ Ideal, data = dataforAICMedians_3sd_plusone_clean)
AIC2Medians_3sd_plusone <- AIC(Model2Medians_3sd_plusone)
r2_model2_median_3sd_plusone <- summary(Model2Medians_3sd_plusone)$r.squared
# Model III
Model3Medians_3sd_plusone <- lm(Reasonable ~ Average + Ideal, data = dataforAICMedians_3sd_plusone_clean)
AIC3Medians_3sd_plusone <- AIC(Model3Medians_3sd_plusone)
r2_model3_median_3sd_plusone <- summary(Model3Medians_3sd_plusone)$r.squared
# Create a regression summary dataframe
regression_summary_medians_3sd_plusone <- data.frame(
Model = c("Average predicts Reasonable",
"Ideal predicts Reasonable",
"Average + Ideal predict Reasonable"),
AIC = c(AIC1Medians_3sd_plusone, AIC2Medians_3sd_plusone, AIC3Medians_3sd_plusone),
R_squared = c(r2_model1_median_3sd_plusone, r2_model2_median_3sd_plusone, r2_model3_median_3sd_plusone)
)
# Display the regression summary table
kable(regression_summary_medians_3sd_plusone, caption = "Regression Comparison (Median, WITH 3SD, Log Plus One)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
} else {
cat("Not enough valid data points for regression (WITH 3SD, Log Plus One).\n")
regression_summary_medians_3sd_plusone <- data.frame(Model=character(), AIC=numeric(), R_squared=numeric())
}
| Model | AIC | R_squared |
|---|---|---|
| Average predicts Reasonable | 86.73858 | 0.6998228 |
| Ideal predicts Reasonable | 65.11702 | 0.8441063 |
| Average + Ideal predict Reasonable | 25.78123 | 0.9554517 |
# Check if models exist before plotting
if(exists("Model1Medians_3sd_smallval") && exists("dataforAICMedians_3sd_smallval_clean") && nrow(dataforAICMedians_3sd_smallval_clean) > 0) {
# Average Median vs Reasonable Median
plot1_median_3sd_smallval <- ggplot(dataforAICMedians_3sd_smallval_clean, aes(x = Average, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) + geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Model 1 (WITH 3SD, Log Small Const): Median Average vs Reasonable",
subtitle = paste("AIC =", round(AIC1Medians_3sd_smallval, 2), ", R² =", round(r2_model1_median_3sd_smallval, 3)),
x = "Log (Small Const) Median Average Rating (3SD Removed)",
y = "Log (Small Const) Median Reasonable Rating (3SD Removed)") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Ideal Median vs Reasonable Median
plot2_median_3sd_smallval <- ggplot(dataforAICMedians_3sd_smallval_clean, aes(x = Ideal, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) + geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Model 2 (WITH 3SD, Log Small Const): Median Ideal vs Reasonable",
subtitle = paste("AIC =", round(AIC2Medians_3sd_smallval, 2), ", R² =", round(r2_model2_median_3sd_smallval, 3)),
x = "Log (Small Const) Median Ideal Rating (3SD Removed)",
y = "Log (Small Const) Median Reasonable Rating (3SD Removed)") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Model 3 (Average effect)
new_data_median_3sd_smallval <- expand.grid( Average = seq(min(dataforAICMedians_3sd_smallval_clean$Average, na.rm=TRUE), max(dataforAICMedians_3sd_smallval_clean$Average, na.rm=TRUE), length.out = 100), Ideal = median(dataforAICMedians_3sd_smallval_clean$Ideal, na.rm=TRUE) )
new_data_median_3sd_smallval <- new_data_median_3sd_smallval[is.finite(new_data_median_3sd_smallval$Average) & is.finite(new_data_median_3sd_smallval$Ideal),]
if(nrow(new_data_median_3sd_smallval)>0){ new_data_median_3sd_smallval$predicted_reasonable <- predict(Model3Medians_3sd_smallval, newdata = new_data_median_3sd_smallval) } else { new_data_median_3sd_smallval <- data.frame(Average=numeric(), Ideal=numeric(), predicted_reasonable=numeric()) }
plot3_median_3sd_smallval <- ggplot(dataforAICMedians_3sd_smallval_clean, aes(x = Average, y = Reasonable)) + geom_point(size = 3, alpha = 0.5) + geom_line(data = new_data_median_3sd_smallval, aes(y = predicted_reasonable), color = "blue", size = 1) + labs(title = "Model 3 (WITH 3SD, Log Small Const): Effect of Median Average", subtitle = paste("AIC =", round(AIC3Medians_3sd_smallval, 2), ", R² =", round(r2_model3_median_3sd_smallval, 3)), x = "Log (Small Const) Median Average Rating (3SD Removed)", y = "Log (Small Const) Median Reasonable Rating (3SD Removed)") + theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Model 3 (Ideal effect)
new_data2_median_3sd_smallval <- expand.grid( Average = median(dataforAICMedians_3sd_smallval_clean$Average, na.rm=TRUE), Ideal = seq(min(dataforAICMedians_3sd_smallval_clean$Ideal, na.rm=TRUE), max(dataforAICMedians_3sd_smallval_clean$Ideal, na.rm=TRUE), length.out = 100) )
new_data2_median_3sd_smallval <- new_data2_median_3sd_smallval[is.finite(new_data2_median_3sd_smallval$Average) & is.finite(new_data2_median_3sd_smallval$Ideal),]
if(nrow(new_data2_median_3sd_smallval)>0){ new_data2_median_3sd_smallval$predicted_reasonable <- predict(Model3Medians_3sd_smallval, newdata = new_data2_median_3sd_smallval) } else { new_data2_median_3sd_smallval <- data.frame(Average=numeric(), Ideal=numeric(), predicted_reasonable=numeric()) }
plot4_median_3sd_smallval <- ggplot(dataforAICMedians_3sd_smallval_clean, aes(x = Ideal, y = Reasonable)) + geom_point(size = 3, alpha = 0.5) + geom_line(data = new_data2_median_3sd_smallval, aes(y = predicted_reasonable), color = "red", size = 1) + labs(title = "Model 3 (WITH 3SD, Log Small Const): Effect of Median Ideal", subtitle = paste("AIC =", round(AIC3Medians_3sd_smallval, 2), ", R² =", round(r2_model3_median_3sd_smallval, 3)), x = "Log (Small Const) Median Ideal Rating (3SD Removed)", y = "Log (Small Const) Median Reasonable Rating (3SD Removed)") + theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Display plots
print(plot1_median_3sd_smallval)
print(plot2_median_3sd_smallval)
print(plot3_median_3sd_smallval)
print(plot4_median_3sd_smallval)
# Save plots
ggsave("model1_median_avg_reas_3sd_smallval.png", plot1_median_3sd_smallval, width = 8, height = 6, dpi = 300)
ggsave("model2_median_ideal_reas_3sd_smallval.png", plot2_median_3sd_smallval, width = 8, height = 6, dpi = 300)
ggsave("model3_median_avg_eff_3sd_smallval.png", plot3_median_3sd_smallval, width = 8, height = 6, dpi = 300)
ggsave("model3_median_ideal_eff_3sd_smallval.png", plot4_median_3sd_smallval, width = 8, height = 6, dpi = 300)
grid_plots_median_3sd_smallval <- grid.arrange(plot1_median_3sd_smallval, plot2_median_3sd_smallval, plot3_median_3sd_smallval, plot4_median_3sd_smallval, ncol = 2)
ggsave("all_median_plots_3sd_smallval.png", grid_plots_median_3sd_smallval, width = 14, height = 10, dpi = 300)
# Correlation matrix
cor_data_3sd_smallval <- dataforAICMedians_3sd_smallval_clean[complete.cases(dataforAICMedians_3sd_smallval_clean),]
if(nrow(cor_data_3sd_smallval) > 1) {
cor_matrix_3sd_smallval <- cor(cor_data_3sd_smallval, use = "complete.obs")
corrplot(cor_matrix_3sd_smallval, method = "color", type = "upper", addCoef.col = "black", tl.col = "black", tl.srt = 45, diag = FALSE, title = "Correlation Matrix (WITH 3SD, Log Small Constant)", mar = c(0,0,1,0))
} else { cat("Not enough data for correlation matrix (WITH 3SD, Log Small Constant).\n") }
# Coefficient tables
model1_coefs_3sd_smallval <- get_model_coefs(Model1Medians_3sd_smallval)
model2_coefs_3sd_smallval <- get_model_coefs(Model2Medians_3sd_smallval)
model3_coefs_3sd_smallval <- get_model_coefs(Model3Medians_3sd_smallval)
model1_coefs_3sd_smallval$p_value <- sapply(model1_coefs_3sd_smallval$p_value, format_pvalue)
model2_coefs_3sd_smallval$p_value <- sapply(model2_coefs_3sd_smallval$p_value, format_pvalue)
model3_coefs_3sd_smallval$p_value <- sapply(model3_coefs_3sd_smallval$p_value, format_pvalue)
kable(model1_coefs_3sd_smallval, caption = "Model 1 Coeffs (WITH 3SD, Log Small Const)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
kable(model2_coefs_3sd_smallval, caption = "Model 2 Coeffs (WITH 3SD, Log Small Const)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
kable(model3_coefs_3sd_smallval, caption = "Model 3 Coeffs (WITH 3SD, Log Small Const)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
# Print model summaries
cat("\nModel Summaries (WITH 3SD, Log Small Constant):\n")
print(summary(Model1Medians_3sd_smallval))
print(summary(Model2Medians_3sd_smallval))
print(summary(Model3Medians_3sd_smallval))
} else {
cat("Skipping plots and summaries for (WITH 3SD, Log Small Constant) due to insufficient data.\n")
}
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
##
## Model Summaries (WITH 3SD, Log Small Constant):
##
## Call:
## lm(formula = Reasonable ~ Average, data = dataforAICMedians_3sd_smallval_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1568 -0.3358 -0.1127 0.6892 1.3648
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02213 0.31715 0.070 0.945
## Average 0.81753 0.09896 8.261 2.49e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9284 on 31 degrees of freedom
## Multiple R-squared: 0.6876, Adjusted R-squared: 0.6776
## F-statistic: 68.24 on 1 and 31 DF, p-value: 2.486e-09
##
##
## Call:
## lm(formula = Reasonable ~ Ideal, data = dataforAICMedians_3sd_smallval_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4195 -0.7295 -0.2187 0.2721 2.6536
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.82816 0.20911 8.743 7.16e-10 ***
## Ideal 0.41036 0.06769 6.063 1.03e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.124 on 31 degrees of freedom
## Multiple R-squared: 0.5425, Adjusted R-squared: 0.5277
## F-statistic: 36.76 on 1 and 31 DF, p-value: 1.027e-06
##
##
## Call:
## lm(formula = Reasonable ~ Average + Ideal, data = dataforAICMedians_3sd_smallval_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.93341 -0.36561 -0.06373 0.34015 1.23273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.19225 0.16724 1.150 0.259
## Average 0.64316 0.05528 11.634 1.20e-12 ***
## Ideal 0.28444 0.03124 9.104 3.88e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4865 on 30 degrees of freedom
## Multiple R-squared: 0.917, Adjusted R-squared: 0.9115
## F-statistic: 165.7 on 2 and 30 DF, p-value: < 2.2e-16
# Store results
analysis_3_results_3sd_smallval <- list(
medians_by_question = if(exists("medians_df_3sd")) medians_df_3sd else NULL,
log_medians_by_question = if(exists("log_medians_df_3sd_smallval")) log_medians_df_3sd_smallval else NULL,
regression_models = if(exists("Model1Medians_3sd_smallval")) list(model1 = Model1Medians_3sd_smallval, model2 = Model2Medians_3sd_smallval, model3 = Model3Medians_3sd_smallval) else NULL,
regression_summary = if(exists("regression_summary_medians_3sd_smallval")) regression_summary_medians_3sd_smallval else NULL,
aic_values = if(exists("AIC1Medians_3sd_smallval")) c(AIC1 = AIC1Medians_3sd_smallval, AIC2 = AIC2Medians_3sd_smallval, AIC3 = AIC3Medians_3sd_smallval) else NULL
)
# Check if models exist before plotting
if(exists("Model1Medians_3sd_plusone") && exists("dataforAICMedians_3sd_plusone_clean") && nrow(dataforAICMedians_3sd_plusone_clean) > 0) {
# Average Median vs Reasonable Median
plot1_median_3sd_plusone <- ggplot(dataforAICMedians_3sd_plusone_clean, aes(x = Average, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) + geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Model 1 (WITH 3SD, Log+1): Median Average vs Reasonable",
subtitle = paste("AIC =", round(AIC1Medians_3sd_plusone, 2), ", R² =", round(r2_model1_median_3sd_plusone, 3)),
x = "Log (+1) Median Average Rating (3SD Removed)",
y = "Log (+1) Median Reasonable Rating (3SD Removed)") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Ideal Median vs Reasonable Median
plot2_median_3sd_plusone <- ggplot(dataforAICMedians_3sd_plusone_clean, aes(x = Ideal, y = Reasonable)) +
geom_point(size = 3, alpha = 0.7) + geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(title = "Model 2 (WITH 3SD, Log+1): Median Ideal vs Reasonable",
subtitle = paste("AIC =", round(AIC2Medians_3sd_plusone, 2), ", R² =", round(r2_model2_median_3sd_plusone, 3)),
x = "Log (+1) Median Ideal Rating (3SD Removed)",
y = "Log (+1) Median Reasonable Rating (3SD Removed)") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Model 3 (Average effect)
new_data_median_3sd_plusone <- expand.grid( Average = seq(min(dataforAICMedians_3sd_plusone_clean$Average, na.rm=TRUE), max(dataforAICMedians_3sd_plusone_clean$Average, na.rm=TRUE), length.out = 100), Ideal = median(dataforAICMedians_3sd_plusone_clean$Ideal, na.rm=TRUE) )
new_data_median_3sd_plusone <- new_data_median_3sd_plusone[is.finite(new_data_median_3sd_plusone$Average) & is.finite(new_data_median_3sd_plusone$Ideal),]
if(nrow(new_data_median_3sd_plusone)>0){ new_data_median_3sd_plusone$predicted_reasonable <- predict(Model3Medians_3sd_plusone, newdata = new_data_median_3sd_plusone) } else { new_data_median_3sd_plusone <- data.frame(Average=numeric(), Ideal=numeric(), predicted_reasonable=numeric()) }
plot3_median_3sd_plusone <- ggplot(dataforAICMedians_3sd_plusone_clean, aes(x = Average, y = Reasonable)) + geom_point(size = 3, alpha = 0.5) + geom_line(data = new_data_median_3sd_plusone, aes(y = predicted_reasonable), color = "blue", size = 1) + labs(title = "Model 3 (WITH 3SD, Log+1): Effect of Median Average", subtitle = paste("AIC =", round(AIC3Medians_3sd_plusone, 2), ", R² =", round(r2_model3_median_3sd_plusone, 3)), x = "Log (+1) Median Average Rating (3SD Removed)", y = "Log (+1) Median Reasonable Rating (3SD Removed)") + theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Model 3 (Ideal effect)
new_data2_median_3sd_plusone <- expand.grid( Average = median(dataforAICMedians_3sd_plusone_clean$Average, na.rm=TRUE), Ideal = seq(min(dataforAICMedians_3sd_plusone_clean$Ideal, na.rm=TRUE), max(dataforAICMedians_3sd_plusone_clean$Ideal, na.rm=TRUE), length.out = 100) )
new_data2_median_3sd_plusone <- new_data2_median_3sd_plusone[is.finite(new_data2_median_3sd_plusone$Average) & is.finite(new_data2_median_3sd_plusone$Ideal),]
if(nrow(new_data2_median_3sd_plusone)>0){ new_data2_median_3sd_plusone$predicted_reasonable <- predict(Model3Medians_3sd_plusone, newdata = new_data2_median_3sd_plusone) } else { new_data2_median_3sd_plusone <- data.frame(Average=numeric(), Ideal=numeric(), predicted_reasonable=numeric()) }
plot4_median_3sd_plusone <- ggplot(dataforAICMedians_3sd_plusone_clean, aes(x = Ideal, y = Reasonable)) + geom_point(size = 3, alpha = 0.5) + geom_line(data = new_data2_median_3sd_plusone, aes(y = predicted_reasonable), color = "red", size = 1) + labs(title = "Model 3 (WITH 3SD, Log+1): Effect of Median Ideal", subtitle = paste("AIC =", round(AIC3Medians_3sd_plusone, 2), ", R² =", round(r2_model3_median_3sd_plusone, 3)), x = "Log (+1) Median Ideal Rating (3SD Removed)", y = "Log (+1) Median Reasonable Rating (3SD Removed)") + theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9) )
# Display plots
print(plot1_median_3sd_plusone)
print(plot2_median_3sd_plusone)
print(plot3_median_3sd_plusone)
print(plot4_median_3sd_plusone)
# Save plots
ggsave("model1_median_avg_reas_3sd_plusone.png", plot1_median_3sd_plusone, width = 8, height = 6, dpi = 300)
ggsave("model2_median_ideal_reas_3sd_plusone.png", plot2_median_3sd_plusone, width = 8, height = 6, dpi = 300)
ggsave("model3_median_avg_eff_3sd_plusone.png", plot3_median_3sd_plusone, width = 8, height = 6, dpi = 300)
ggsave("model3_median_ideal_eff_3sd_plusone.png", plot4_median_3sd_plusone, width = 8, height = 6, dpi = 300)
grid_plots_median_3sd_plusone <- grid.arrange(plot1_median_3sd_plusone, plot2_median_3sd_plusone, plot3_median_3sd_plusone, plot4_median_3sd_plusone, ncol = 2)
ggsave("all_median_plots_3sd_plusone.png", grid_plots_median_3sd_plusone, width = 14, height = 10, dpi = 300)
# Correlation matrix
cor_data_3sd_plusone <- dataforAICMedians_3sd_plusone_clean[complete.cases(dataforAICMedians_3sd_plusone_clean),]
if(nrow(cor_data_3sd_plusone) > 1) {
cor_matrix_3sd_plusone <- cor(cor_data_3sd_plusone, use = "complete.obs")
corrplot(cor_matrix_3sd_plusone, method = "color", type = "upper", addCoef.col = "black", tl.col = "black", tl.srt = 45, diag = FALSE, title = "Correlation Matrix (WITH 3SD, Log Plus One)", mar = c(0,0,1,0))
} else { cat("Not enough data for correlation matrix (WITH 3SD, Log Plus One).\n") }
# Coefficient tables
model1_coefs_3sd_plusone <- get_model_coefs(Model1Medians_3sd_plusone)
model2_coefs_3sd_plusone <- get_model_coefs(Model2Medians_3sd_plusone)
model3_coefs_3sd_plusone <- get_model_coefs(Model3Medians_3sd_plusone)
model1_coefs_3sd_plusone$p_value <- sapply(model1_coefs_3sd_plusone$p_value, format_pvalue)
model2_coefs_3sd_plusone$p_value <- sapply(model2_coefs_3sd_plusone$p_value, format_pvalue)
model3_coefs_3sd_plusone$p_value <- sapply(model3_coefs_3sd_plusone$p_value, format_pvalue)
kable(model1_coefs_3sd_plusone, caption = "Model 1 Coeffs (WITH 3SD, Log Plus One)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
kable(model2_coefs_3sd_plusone, caption = "Model 2 Coeffs (WITH 3SD, Log Plus One)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
kable(model3_coefs_3sd_plusone, caption = "Model 3 Coeffs (WITH 3SD, Log Plus One)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
# Print model summaries
cat("\nModel Summaries (WITH 3SD, Log Plus One):\n")
print(summary(Model1Medians_3sd_plusone))
print(summary(Model2Medians_3sd_plusone))
print(summary(Model3Medians_3sd_plusone))
} else {
cat("Skipping plots and summaries for (WITH 3SD, Log Plus One) due to insufficient data.\n")
}
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
##
## Model Summaries (WITH 3SD, Log Plus One):
##
## Call:
## lm(formula = Reasonable ~ Average, data = dataforAICMedians_3sd_plusone_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0611 -0.3553 -0.1168 0.6285 1.3474
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.14186 0.30917 0.459 0.65
## Average 0.80408 0.09458 8.501 1.33e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8484 on 31 degrees of freedom
## Multiple R-squared: 0.6998, Adjusted R-squared: 0.6901
## F-statistic: 72.27 on 1 and 31 DF, p-value: 1.331e-09
##
##
## Call:
## lm(formula = Reasonable ~ Ideal, data = dataforAICMedians_3sd_plusone_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6903 -0.3466 -0.1764 0.2491 1.6321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.83447 0.16400 5.088 1.67e-05 ***
## Ideal 0.81779 0.06312 12.956 4.74e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6114 on 31 degrees of freedom
## Multiple R-squared: 0.8441, Adjusted R-squared: 0.8391
## F-statistic: 167.9 on 1 and 31 DF, p-value: 4.742e-14
##
##
## Call:
## lm(formula = Reasonable ~ Average + Ideal, data = dataforAICMedians_3sd_plusone_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60695 -0.21446 -0.02932 0.09051 1.15093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.12470 0.12108 1.030 0.311
## Average 0.41207 0.04759 8.659 1.17e-09 ***
## Ideal 0.57820 0.04407 13.120 5.81e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3322 on 30 degrees of freedom
## Multiple R-squared: 0.9555, Adjusted R-squared: 0.9525
## F-statistic: 321.7 on 2 and 30 DF, p-value: < 2.2e-16
# Store results
analysis_3_results_3sd_plusone <- list(
medians_by_question = if(exists("medians_df_3sd")) medians_df_3sd else NULL,
log_medians_by_question = if(exists("log_medians_df_3sd_plusone")) log_medians_df_3sd_plusone else NULL,
regression_models = if(exists("Model1Medians_3sd_plusone")) list(model1 = Model1Medians_3sd_plusone, model2 = Model2Medians_3sd_plusone, model3 = Model3Medians_3sd_plusone) else NULL,
regression_summary = if(exists("regression_summary_medians_3sd_plusone")) regression_summary_medians_3sd_plusone else NULL,
aic_values = if(exists("AIC1Medians_3sd_plusone")) c(AIC1 = AIC1Medians_3sd_plusone, AIC2 = AIC2Medians_3sd_plusone, AIC3 = AIC3Medians_3sd_plusone) else NULL
)
Step 1 and Step 2 have been completed in the earlier analysis.
# Function to check if reasonable is on the "ideal side" of average
# Equal median ratings are treated as being on the predicted side
is_ideal_side_median <- function(average, ideal, reasonable) {
if (any(is.na(c(average, ideal, reasonable)))) {
return(NA)
}
# Check if distance to ideal is less than or equal to distance to average
ifelse(abs(reasonable - ideal) <= abs(reasonable - average), 1, 0)
}
# Function to check if reasonable is on the "average side" of ideal
# Equal median ratings are treated as being on the predicted side
is_average_side_median <- function(average, ideal, reasonable) {
if (any(is.na(c(average, ideal, reasonable)))) {
return(NA)
}
# Check if distance to average is less than or equal to distance to ideal
ifelse(abs(reasonable - average) <= abs(reasonable - ideal), 1, 0)
}
# Function to check if reasonable falls between average and ideal (inclusive)
# Handles cases where average == ideal
is_both_sides_median <- function(average, ideal, reasonable) {
if (any(is.na(c(average, ideal, reasonable)))) {
return(NA)
}
# Check if reasonable is numerically between average and ideal, inclusive
# This works regardless of whether average > ideal or ideal > average
ifelse((reasonable >= min(average, ideal)) & (reasonable <= max(average, ideal)), 1, 0)
}
# --- Calculate all vectors ---
ideal_side_vector_median_no3sd <- mapply(is_ideal_side_median, column_medians_average_no3sd, column_medians_ideal_no3sd, column_medians_reasonable_no3sd)
average_side_vector_median_no3sd <- mapply(is_average_side_median, column_medians_average_no3sd, column_medians_ideal_no3sd, column_medians_reasonable_no3sd)
both_sides_vector_median_no3sd <- mapply(is_both_sides_median, column_medians_average_no3sd, column_medians_ideal_no3sd, column_medians_reasonable_no3sd)
# --- Calculate counts and valid Ns ---
count_ideal_side_median_no3sd <- sum(ideal_side_vector_median_no3sd, na.rm = TRUE)
valid_questions_ideal_no3sd <- sum(!is.na(ideal_side_vector_median_no3sd))
count_average_side_median_no3sd <- sum(average_side_vector_median_no3sd, na.rm = TRUE)
valid_questions_avg_no3sd <- sum(!is.na(average_side_vector_median_no3sd))
count_both_sides_median_no3sd <- sum(both_sides_vector_median_no3sd, na.rm = TRUE)
valid_questions_both_no3sd <- sum(!is.na(both_sides_vector_median_no3sd))
# --- Perform binomial tests ---
p_val_ideal_no3sd <- NA; binomial_result_ideal_median_no3sd <- NULL
if (valid_questions_ideal_no3sd > 0) {
binomial_result_ideal_median_no3sd <- binom.test(count_ideal_side_median_no3sd, valid_questions_ideal_no3sd, p = 0.5, alternative = "greater")
p_val_ideal_no3sd <- binomial_result_ideal_median_no3sd$p.value
}
p_val_avg_no3sd <- NA; binomial_result_avg_side_median_no3sd <- NULL
if (valid_questions_avg_no3sd > 0) {
binomial_result_avg_side_median_no3sd <- binom.test(count_average_side_median_no3sd, valid_questions_avg_no3sd, p = 0.5, alternative = "greater")
p_val_avg_no3sd <- binomial_result_avg_side_median_no3sd$p.value
}
p_val_both_no3sd <- NA; binomial_result_both_sides_median_no3sd <- NULL
if (valid_questions_both_no3sd > 0) {
binomial_result_both_sides_median_no3sd <- binom.test(count_both_sides_median_no3sd, valid_questions_both_no3sd, p = 1/3, alternative = "two.sided")
p_val_both_no3sd <- binomial_result_both_sides_median_no3sd$p.value
}
# --- Create ONE combined summary table ---
intermediacy_summary_no3sd <- data.frame(
Test = c("On Ideal Side of Average", "On Average Side of Ideal", "Between Average and Ideal"),
Count = c(count_ideal_side_median_no3sd, count_average_side_median_no3sd, count_both_sides_median_no3sd),
Total_Valid = c(valid_questions_ideal_no3sd, valid_questions_avg_no3sd, valid_questions_both_no3sd),
Proportion = c(ifelse(valid_questions_ideal_no3sd > 0, count_ideal_side_median_no3sd / valid_questions_ideal_no3sd, NA),
ifelse(valid_questions_avg_no3sd > 0, count_average_side_median_no3sd / valid_questions_avg_no3sd, NA),
ifelse(valid_questions_both_no3sd > 0, count_both_sides_median_no3sd / valid_questions_both_no3sd, NA)),
Expected_Prop = c(0.5, 0.5, 1/3),
p_value = c(p_val_ideal_no3sd, p_val_avg_no3sd, p_val_both_no3sd)
)
# Format proportions and p-values
intermediacy_summary_no3sd$Proportion <- scales::percent(intermediacy_summary_no3sd$Proportion, accuracy = 0.1)
intermediacy_summary_no3sd$Expected_Prop <- scales::percent(intermediacy_summary_no3sd$Expected_Prop, accuracy = 0.1)
intermediacy_summary_no3sd$p_value <- sapply(intermediacy_summary_no3sd$p_value, format_pvalue)
# Display the summary table
kable(intermediacy_summary_no3sd[,c("Test", "Count", "Total_Valid", "Proportion", "Expected_Prop", "p_value")],
caption = "Intermediacy Binomial Tests (NO 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Test | Count | Total_Valid | Proportion | Expected_Prop | p_value |
|---|---|---|---|---|---|
| On Ideal Side of Average | 29 | 33 | 87.9% | 50.0% | < .001 |
| On Average Side of Ideal | 8 | 33 | 24.2% | 50.0% | .999 |
| Between Average and Ideal | 31 | 33 | 93.9% | 33.3% | < .001 |
# Create a table showing the results for each question
question_position_summary_median_no3sd <- data.frame(
Question = names(column_medians_average_no3sd),
Average_Value = column_medians_average_no3sd,
Ideal_Value = column_medians_ideal_no3sd,
Reasonable_Value = column_medians_reasonable_no3sd,
On_Ideal_Side = ideal_side_vector_median_no3sd,
On_Average_Side = average_side_vector_median_no3sd,
Between_Average_And_Ideal = both_sides_vector_median_no3sd
)
# Handle NAs and Categorize
question_position_summary_median_no3sd <- question_position_summary_median_no3sd %>%
mutate(
On_Ideal_Side = ifelse(is.na(On_Ideal_Side), 0, On_Ideal_Side),
On_Average_Side = ifelse(is.na(On_Average_Side), 0, On_Average_Side),
Between_Average_And_Ideal = ifelse(is.na(Between_Average_And_Ideal), 0, Between_Average_And_Ideal)
)
question_position_summary_median_no3sd$Position <- "Other/NA"
question_position_summary_median_no3sd$Position[question_position_summary_median_no3sd$Between_Average_And_Ideal == 1] <- "Between Average and Ideal"
question_position_summary_median_no3sd$Position[question_position_summary_median_no3sd$Between_Average_And_Ideal == 0 & question_position_summary_median_no3sd$On_Ideal_Side == 1 & question_position_summary_median_no3sd$On_Average_Side == 0] <- "Beyond Ideal"
question_position_summary_median_no3sd$Position[question_position_summary_median_no3sd$Between_Average_And_Ideal == 0 & question_position_summary_median_no3sd$On_Average_Side == 1 & question_position_summary_median_no3sd$On_Ideal_Side == 0] <- "Beyond Average"
question_position_summary_median_no3sd$Position[
!is.na(question_position_summary_median_no3sd$Average_Value) &
!is.na(question_position_summary_median_no3sd$Ideal_Value) &
question_position_summary_median_no3sd$Average_Value == question_position_summary_median_no3sd$Ideal_Value] <- "Average = Ideal"
# Display the question position summary
kable(question_position_summary_median_no3sd[, c("Question", "Average_Value", "Ideal_Value", "Reasonable_Value", "Position")],
caption = "Position of Median Reasonable Rating (NO 3SD Removal) Relative to Average and Ideal") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average_Value | Ideal_Value | Reasonable_Value | Position | |
|---|---|---|---|---|---|
| q1 | q1 | 3.0 | 2.0 | 2.0 | Between Average and Ideal |
| q2 | q2 | 7.0 | 2.0 | 4.0 | Between Average and Ideal |
| q3 | q3 | 3.0 | 6.0 | 5.0 | Between Average and Ideal |
| q4 | q4 | 2000.0 | 2000.0 | 2000.0 | Average = Ideal |
| q5 | q5 | 20.0 | 30.0 | 30.0 | Between Average and Ideal |
| q6 | q6 | 10.0 | 1.0 | 3.0 | Between Average and Ideal |
| q7 | q7 | 15.0 | 5.0 | 10.0 | Between Average and Ideal |
| q8 | q8 | 5.0 | 12.0 | 10.0 | Between Average and Ideal |
| q9 | q9 | 5.0 | 3.0 | 5.0 | Between Average and Ideal |
| q10 | q10 | 5.0 | 0.0 | 2.0 | Between Average and Ideal |
| q11 | q11 | 500.0 | 0.0 | 10.0 | Between Average and Ideal |
| q12 | q12 | 30.0 | 1.0 | 10.0 | Between Average and Ideal |
| q13 | q13 | 40.0 | 10.0 | 15.0 | Between Average and Ideal |
| q14 | q14 | 10.0 | 2.0 | 5.0 | Between Average and Ideal |
| q15 | q15 | 9.0 | 10.0 | 8.0 | Beyond Average |
| q16 | q16 | 5.0 | 5.0 | 4.0 | Average = Ideal |
| q17 | q17 | 3.0 | 0.0 | 1.0 | Between Average and Ideal |
| q18 | q18 | 15.0 | 2.0 | 9.5 | Between Average and Ideal |
| q20 | q20 | 20.0 | 0.0 | 4.0 | Between Average and Ideal |
| q21 | q21 | 8.0 | 5.0 | 5.0 | Between Average and Ideal |
| q22 | q22 | 10.0 | 7.0 | 7.0 | Between Average and Ideal |
| q23 | q23 | 2.0 | 3.0 | 3.0 | Between Average and Ideal |
| q24 | q24 | 16.0 | 24.0 | 24.0 | Between Average and Ideal |
| q25 | q25 | 2000.0 | 500.0 | 1000.0 | Between Average and Ideal |
| q26 | q26 | 8.0 | 2.0 | 4.0 | Between Average and Ideal |
| q27 | q27 | 12.0 | 5.0 | 6.0 | Between Average and Ideal |
| q28 | q28 | 10.0 | 20.0 | 20.0 | Between Average and Ideal |
| q29 | q29 | 55.0 | 90.0 | 80.0 | Between Average and Ideal |
| q30 | q30 | 12.0 | 4.0 | 4.0 | Between Average and Ideal |
| q31 | q31 | 50.0 | 13.0 | 25.0 | Between Average and Ideal |
| q32 | q32 | 24.0 | 48.0 | 40.0 | Between Average and Ideal |
| q33 | q33 | 5.5 | 2.5 | 4.0 | Between Average and Ideal |
| q34 | q34 | 50.0 | 10.0 | 50.0 | Between Average and Ideal |
# Count the number of questions in each position category
position_counts_median_no3sd <- table(question_position_summary_median_no3sd$Position)
position_summary_median_no3sd <- data.frame(Position = names(position_counts_median_no3sd), Count = as.numeric(position_counts_median_no3sd))
total_categorized_no3sd <- sum(position_summary_median_no3sd$Count)
position_summary_median_no3sd$Percentage <- ifelse(total_categorized_no3sd > 0, (position_summary_median_no3sd$Count / total_categorized_no3sd) * 100, 0)
position_summary_median_no3sd$Percentage <- round(position_summary_median_no3sd$Percentage, 1)
# Display the position count summary
kable(position_summary_median_no3sd, caption = "Distribution of Position Categories (Median Values, NO 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Position | Count | Percentage |
|---|---|---|
| Average = Ideal | 2 | 6.1 |
| Between Average and Ideal | 30 | 6.1 |
| Beyond Average | 1 | 6.1 |
# Create distribution plot
position_distribution_plot_median_no3sd <- ggplot(position_summary_median_no3sd, aes(x = Position, y = Count, fill = Position)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(Count)), vjust = -0.5, size = 4) +
labs(title = "Distribution of Position Categories (Median Values, NO 3SD Removal)",
subtitle = "Where does 'Reasonable' fall relative to 'Average' and 'Ideal'?",
x = "Position Category", y = "Number of Questions") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9, angle = 15, hjust = 1), legend.position = "none")
print(position_distribution_plot_median_no3sd)
ggsave("position_distribution_median_no3sd_removed.png", position_distribution_plot_median_no3sd, width = 10, height = 7, dpi = 300)
# Create scatterplot
plot_data_intermediacy_no3sd <- question_position_summary_median_no3sd %>% filter(!is.na(Average_Value) & !is.na(Ideal_Value) & !is.na(Reasonable_Value))
prop_both_no3sd_fmt <- scales::percent(ifelse(valid_questions_both_no3sd > 0, count_both_sides_median_no3sd/valid_questions_both_no3sd, 0), accuracy=0.1)
intermediacy_plot_median_no3sd <- ggplot(plot_data_intermediacy_no3sd, aes(x = Average_Value, y = Ideal_Value)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", alpha = 0.5) +
geom_point(color = "gray60", size = 3, alpha = 0.6) +
geom_point(aes(x = Average_Value, y = Reasonable_Value, color = Position), size = 4) +
geom_segment(aes(x = Average_Value, xend = Average_Value, y = Ideal_Value, yend = Reasonable_Value, color = Position), linetype = "dotted", size = 0.5, alpha = 0.7) +
scale_color_brewer(palette = "Set1") +
labs(title = "Intermediacy Analysis (Median, NO 3SD Removal): Position of Reasonable",
subtitle = paste0("Between Avg and Ideal: ", count_both_sides_median_no3sd, " of ", valid_questions_both_no3sd, " (", prop_both_no3sd_fmt, ")"),
x = "Median Average Rating (NO 3SD Removal)", y = "Median Rating Value (NO 3SD Removal)", color = "Position Category") +
theme_minimal() + theme(plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9), legend.position = "bottom")
print(intermediacy_plot_median_no3sd)
ggsave("intermediacy_analysis_median_no3sd_removed.png", intermediacy_plot_median_no3sd, width = 10, height = 8, dpi = 300)
# Create rating patterns plot
ratings_long_median_no3sd <- question_position_summary_median_no3sd %>%
select(Question, Average_Value, Ideal_Value, Reasonable_Value, Position) %>%
pivot_longer(cols = c(Average_Value, Ideal_Value, Reasonable_Value), names_to = "Rating_Type", values_to = "Value") %>%
mutate(Rating_Type = factor(gsub("_Value", "", Rating_Type), levels = c("Average", "Reasonable", "Ideal"))) %>%
filter(!is.na(Value))
rating_patterns_plot_median_no3sd <- ggplot(ratings_long_median_no3sd, aes(x = Rating_Type, y = Value, group = Question, color = Position)) +
geom_line(alpha = 0.5) + geom_point(size = 2.5, alpha=0.7) + facet_wrap(~ Position, scales = "free_y") +
scale_color_brewer(palette = "Set1") +
labs(title = "Rating Patterns by Position Category (Median Values, NO 3SD Removal)",
subtitle = "How Average, Ideal, and Reasonable median ratings relate within each category",
x = "Rating Type", y = "Median Value (NO 3SD Removal)") +
theme_minimal() + theme(plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text.x = element_text(size = 9, angle = 45, hjust = 1), axis.text.y = element_text(size = 9), strip.text = element_text(size=10, face="bold"), legend.position = "none")
print(rating_patterns_plot_median_no3sd)
ggsave("rating_patterns_by_position_median_no3sd_removed.png", rating_patterns_plot_median_no3sd, width = 12, height = 8, dpi = 300)
# Store results
analysis_4_results_no3sd <- list(
ideal_side = list(vector=ideal_side_vector_median_no3sd, count=count_ideal_side_median_no3sd, valid_n=valid_questions_ideal_no3sd, binomial_test=binomial_result_ideal_median_no3sd, summary_row=intermediacy_summary_no3sd[1,]),
average_side = list(vector=average_side_vector_median_no3sd, count=count_average_side_median_no3sd, valid_n=valid_questions_avg_no3sd, binomial_test=binomial_result_avg_side_median_no3sd, summary_row=intermediacy_summary_no3sd[2,]),
both_sides = list(vector=both_sides_vector_median_no3sd, count=count_both_sides_median_no3sd, valid_n=valid_questions_both_no3sd, binomial_test=binomial_result_both_sides_median_no3sd, summary_row=intermediacy_summary_no3sd[3,]),
combined_summary = intermediacy_summary_no3sd,
question_position = question_position_summary_median_no3sd,
position_summary = position_summary_median_no3sd
)
Step 1 and Step 2 have been completed in the earlier analysis.
# Function to check if reasonable is on the "ideal side" of average
# Equal median ratings are treated as being on the predicted side
is_ideal_side_median <- function(average, ideal, reasonable) {
if (any(is.na(c(average, ideal, reasonable)))) {
return(NA)
}
# Check if distance to ideal is less than or equal to distance to average
ifelse(abs(reasonable - ideal) <= abs(reasonable - average), 1, 0)
}
# Function to check if reasonable is on the "average side" of ideal
# Equal median ratings are treated as being on the predicted side
is_average_side_median <- function(average, ideal, reasonable) {
if (any(is.na(c(average, ideal, reasonable)))) {
return(NA)
}
# Check if distance to average is less than or equal to distance to ideal
ifelse(abs(reasonable - average) <= abs(reasonable - ideal), 1, 0)
}
# Function to check if reasonable falls between average and ideal (inclusive)
# Handles cases where average == ideal
is_both_sides_median <- function(average, ideal, reasonable) {
if (any(is.na(c(average, ideal, reasonable)))) {
return(NA)
}
# Check if reasonable is numerically between average and ideal, inclusive
# This works regardless of whether average > ideal or ideal > average
ifelse((reasonable >= min(average, ideal)) & (reasonable <= max(average, ideal)), 1, 0)
}
# --- Calculate all vectors ---
ideal_side_vector_median_3sd <- mapply(is_ideal_side_median, column_medians_average_3sd, column_medians_ideal_3sd, column_medians_reasonable_3sd)
average_side_vector_median_3sd <- mapply(is_average_side_median, column_medians_average_3sd, column_medians_ideal_3sd, column_medians_reasonable_3sd)
both_sides_vector_median_3sd <- mapply(is_both_sides_median, column_medians_average_3sd, column_medians_ideal_3sd, column_medians_reasonable_3sd)
# --- Calculate counts and valid Ns ---
count_ideal_side_median_3sd <- sum(ideal_side_vector_median_3sd, na.rm = TRUE)
valid_questions_ideal_3sd <- sum(!is.na(ideal_side_vector_median_3sd))
count_average_side_median_3sd <- sum(average_side_vector_median_3sd, na.rm = TRUE)
valid_questions_avg_3sd <- sum(!is.na(average_side_vector_median_3sd))
count_both_sides_median_3sd <- sum(both_sides_vector_median_3sd, na.rm = TRUE)
valid_questions_both_3sd <- sum(!is.na(both_sides_vector_median_3sd))
# --- Perform binomial tests ---
p_val_ideal_3sd <- NA; binomial_result_ideal_median_3sd <- NULL
if (valid_questions_ideal_3sd > 0) {
binomial_result_ideal_median_3sd <- binom.test(count_ideal_side_median_3sd, valid_questions_ideal_3sd, p = 0.5, alternative = "greater")
p_val_ideal_3sd <- binomial_result_ideal_median_3sd$p.value
}
p_val_avg_3sd <- NA; binomial_result_avg_side_median_3sd <- NULL
if (valid_questions_avg_3sd > 0) {
binomial_result_avg_side_median_3sd <- binom.test(count_average_side_median_3sd, valid_questions_avg_3sd, p = 0.5, alternative = "greater")
p_val_avg_3sd <- binomial_result_avg_side_median_3sd$p.value
}
p_val_both_3sd <- NA; binomial_result_both_sides_median_3sd <- NULL
if (valid_questions_both_3sd > 0) {
binomial_result_both_sides_median_3sd <- binom.test(count_both_sides_median_3sd, valid_questions_both_3sd, p = 1/3, alternative = "two.sided")
p_val_both_3sd <- binomial_result_both_sides_median_3sd$p.value
}
# --- Create ONE combined summary table ---
intermediacy_summary_3sd <- data.frame(
Test = c("On Ideal Side of Average", "On Average Side of Ideal", "Between Average and Ideal"),
Count = c(count_ideal_side_median_3sd, count_average_side_median_3sd, count_both_sides_median_3sd),
Total_Valid = c(valid_questions_ideal_3sd, valid_questions_avg_3sd, valid_questions_both_3sd),
Proportion = c(ifelse(valid_questions_ideal_3sd > 0, count_ideal_side_median_3sd / valid_questions_ideal_3sd, NA),
ifelse(valid_questions_avg_3sd > 0, count_average_side_median_3sd / valid_questions_avg_3sd, NA),
ifelse(valid_questions_both_3sd > 0, count_both_sides_median_3sd / valid_questions_both_3sd, NA)),
Expected_Prop = c(0.5, 0.5, 1/3),
p_value = c(p_val_ideal_3sd, p_val_avg_3sd, p_val_both_3sd)
)
# Format proportions and p-values
intermediacy_summary_3sd$Proportion <- scales::percent(intermediacy_summary_3sd$Proportion, accuracy = 0.1)
intermediacy_summary_3sd$Expected_Prop <- scales::percent(intermediacy_summary_3sd$Expected_Prop, accuracy = 0.1)
intermediacy_summary_3sd$p_value <- sapply(intermediacy_summary_3sd$p_value, format_pvalue)
# Display the summary table
kable(intermediacy_summary_3sd[,c("Test", "Count", "Total_Valid", "Proportion", "Expected_Prop", "p_value")],
caption = "Intermediacy Binomial Tests (WITH 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Test | Count | Total_Valid | Proportion | Expected_Prop | p_value |
|---|---|---|---|---|---|
| On Ideal Side of Average | 29 | 33 | 87.9% | 50.0% | < .001 |
| On Average Side of Ideal | 8 | 33 | 24.2% | 50.0% | .999 |
| Between Average and Ideal | 32 | 33 | 97.0% | 33.3% | < .001 |
# Create a table showing the results for each question
question_position_summary_median_3sd <- data.frame(
Question = names(column_medians_average_3sd),
Average_Value = column_medians_average_3sd,
Ideal_Value = column_medians_ideal_3sd,
Reasonable_Value = column_medians_reasonable_3sd,
On_Ideal_Side = ideal_side_vector_median_3sd,
On_Average_Side = average_side_vector_median_3sd,
Between_Average_And_Ideal = both_sides_vector_median_3sd
)
# Handle NAs and Categorize
question_position_summary_median_3sd <- question_position_summary_median_3sd %>%
mutate(
On_Ideal_Side = ifelse(is.na(On_Ideal_Side), 0, On_Ideal_Side),
On_Average_Side = ifelse(is.na(On_Average_Side), 0, On_Average_Side),
Between_Average_And_Ideal = ifelse(is.na(Between_Average_And_Ideal), 0, Between_Average_And_Ideal)
)
question_position_summary_median_3sd$Position <- "Other/NA"
question_position_summary_median_3sd$Position[question_position_summary_median_3sd$Between_Average_And_Ideal == 1] <- "Between Average and Ideal"
question_position_summary_median_3sd$Position[question_position_summary_median_3sd$Between_Average_And_Ideal == 0 & question_position_summary_median_3sd$On_Ideal_Side == 1 & question_position_summary_median_3sd$On_Average_Side == 0] <- "Beyond Ideal"
question_position_summary_median_3sd$Position[question_position_summary_median_3sd$Between_Average_And_Ideal == 0 & question_position_summary_median_3sd$On_Average_Side == 1 & question_position_summary_median_3sd$On_Ideal_Side == 0] <- "Beyond Average"
question_position_summary_median_3sd$Position[
!is.na(question_position_summary_median_3sd$Average_Value) &
!is.na(question_position_summary_median_3sd$Ideal_Value) &
question_position_summary_median_3sd$Average_Value == question_position_summary_median_3sd$Ideal_Value] <- "Average = Ideal"
# Display the question position summary
kable(question_position_summary_median_3sd[, c("Question", "Average_Value", "Ideal_Value", "Reasonable_Value", "Position")],
caption = "Position of Median Reasonable Rating (WITH 3SD Removal) Relative to Average and Ideal") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Question | Average_Value | Ideal_Value | Reasonable_Value | Position | |
|---|---|---|---|---|---|
| q1 | q1 | 3.0 | 2 | 2 | Between Average and Ideal |
| q2 | q2 | 7.0 | 2 | 3 | Between Average and Ideal |
| q3 | q3 | 3.0 | 6 | 5 | Between Average and Ideal |
| q4 | q4 | 2000.0 | 2000 | 2000 | Average = Ideal |
| q5 | q5 | 20.0 | 30 | 30 | Between Average and Ideal |
| q6 | q6 | 10.0 | 1 | 3 | Between Average and Ideal |
| q7 | q7 | 15.0 | 5 | 10 | Between Average and Ideal |
| q8 | q8 | 5.0 | 12 | 10 | Between Average and Ideal |
| q9 | q9 | 5.0 | 3 | 5 | Between Average and Ideal |
| q10 | q10 | 5.0 | 0 | 2 | Between Average and Ideal |
| q11 | q11 | 500.0 | 0 | 7 | Between Average and Ideal |
| q12 | q12 | 30.0 | 0 | 10 | Between Average and Ideal |
| q13 | q13 | 40.0 | 10 | 15 | Between Average and Ideal |
| q14 | q14 | 10.0 | 2 | 5 | Between Average and Ideal |
| q15 | q15 | 8.0 | 10 | 8 | Between Average and Ideal |
| q16 | q16 | 4.0 | 5 | 4 | Between Average and Ideal |
| q17 | q17 | 3.0 | 0 | 1 | Between Average and Ideal |
| q18 | q18 | 10.0 | 1 | 5 | Between Average and Ideal |
| q20 | q20 | 20.0 | 0 | 2 | Between Average and Ideal |
| q21 | q21 | 8.0 | 5 | 5 | Between Average and Ideal |
| q22 | q22 | 7.0 | 7 | 7 | Average = Ideal |
| q23 | q23 | 2.0 | 2 | 3 | Average = Ideal |
| q24 | q24 | 15.0 | 24 | 24 | Between Average and Ideal |
| q25 | q25 | 2000.0 | 500 | 1000 | Between Average and Ideal |
| q26 | q26 | 8.0 | 2 | 4 | Between Average and Ideal |
| q27 | q27 | 11.5 | 4 | 6 | Between Average and Ideal |
| q28 | q28 | 10.0 | 20 | 20 | Between Average and Ideal |
| q29 | q29 | 54.5 | 90 | 80 | Between Average and Ideal |
| q30 | q30 | 12.0 | 3 | 4 | Between Average and Ideal |
| q31 | q31 | 50.0 | 10 | 20 | Between Average and Ideal |
| q32 | q32 | 24.0 | 48 | 40 | Between Average and Ideal |
| q33 | q33 | 5.0 | 2 | 3 | Between Average and Ideal |
| q34 | q34 | 50.0 | 5 | 50 | Between Average and Ideal |
# Count the number of questions in each position category
position_counts_median_3sd <- table(question_position_summary_median_3sd$Position)
position_summary_median_3sd <- data.frame(Position = names(position_counts_median_3sd), Count = as.numeric(position_counts_median_3sd))
total_categorized_3sd <- sum(position_summary_median_3sd$Count)
position_summary_median_3sd$Percentage <- ifelse(total_categorized_3sd > 0, (position_summary_median_3sd$Count / total_categorized_3sd) * 100, 0)
position_summary_median_3sd$Percentage <- round(position_summary_median_3sd$Percentage, 1)
# Display the position count summary
kable(position_summary_median_3sd, caption = "Distribution of Position Categories (Median Values, WITH 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
| Position | Count | Percentage |
|---|---|---|
| Average = Ideal | 3 | 9.1 |
| Between Average and Ideal | 30 | 9.1 |
# Create distribution plot
position_distribution_plot_median_3sd <- ggplot(position_summary_median_3sd, aes(x = Position, y = Count, fill = Position)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(Count)), vjust = -0.5, size = 4) +
labs(title = "Distribution of Position Categories (Median Values, WITH 3SD Removal)",
subtitle = "Where does 'Reasonable' fall relative to 'Average' and 'Ideal'?",
x = "Position Category", y = "Number of Questions") +
theme_minimal() + theme( plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9, angle = 15, hjust = 1), legend.position = "none")
print(position_distribution_plot_median_3sd)
ggsave("position_distribution_median_with_3sd_removed.png", position_distribution_plot_median_3sd, width = 10, height = 7, dpi = 300)
# Create scatterplot
plot_data_intermediacy_3sd <- question_position_summary_median_3sd %>% filter(!is.na(Average_Value) & !is.na(Ideal_Value) & !is.na(Reasonable_Value))
prop_both_3sd_fmt <- scales::percent(ifelse(valid_questions_both_3sd > 0, count_both_sides_median_3sd/valid_questions_both_3sd, 0), accuracy=0.1)
intermediacy_plot_median_3sd <- ggplot(plot_data_intermediacy_3sd, aes(x = Average_Value, y = Ideal_Value)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", alpha = 0.5) +
geom_point(color = "gray60", size = 3, alpha = 0.6) +
geom_point(aes(x = Average_Value, y = Reasonable_Value, color = Position), size = 4) +
geom_segment(aes(x = Average_Value, xend = Average_Value, y = Ideal_Value, yend = Reasonable_Value, color = Position), linetype = "dotted", size = 0.5, alpha = 0.7) +
scale_color_brewer(palette = "Set1") +
labs(title = "Intermediacy Analysis (Median, WITH 3SD Removal): Position of Reasonable",
subtitle = paste0("Between Avg and Ideal: ", count_both_sides_median_3sd, " of ", valid_questions_both_3sd, " (", prop_both_3sd_fmt, ")"),
x = "Median Average Rating (WITH 3SD Removal)", y = "Median Rating Value (WITH 3SD Removal)", color = "Position Category") +
theme_minimal() + theme(plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text = element_text(size = 9), legend.position = "bottom")
print(intermediacy_plot_median_3sd)
ggsave("intermediacy_analysis_median_with_3sd_removed.png", intermediacy_plot_median_3sd, width = 10, height = 8, dpi = 300)
# Create rating patterns plot
ratings_long_median_3sd <- question_position_summary_median_3sd %>%
select(Question, Average_Value, Ideal_Value, Reasonable_Value, Position) %>%
pivot_longer(cols = c(Average_Value, Ideal_Value, Reasonable_Value), names_to = "Rating_Type", values_to = "Value") %>%
mutate(Rating_Type = factor(gsub("_Value", "", Rating_Type), levels = c("Average", "Reasonable", "Ideal"))) %>%
filter(!is.na(Value))
rating_patterns_plot_median_3sd <- ggplot(ratings_long_median_3sd, aes(x = Rating_Type, y = Value, group = Question, color = Position)) +
geom_line(alpha = 0.5) + geom_point(size = 2.5, alpha=0.7) + facet_wrap(~ Position, scales = "free_y") +
scale_color_brewer(palette = "Set1") +
labs(title = "Rating Patterns by Position Category (Median Values, WITH 3SD Removal)",
subtitle = "How Average, Ideal, and Reasonable median ratings relate within each category",
x = "Rating Type", y = "Median Value (WITH 3SD Removal)") +
theme_minimal() + theme(plot.title = element_text(size = 12, face = "bold"), plot.subtitle = element_text(size = 11), axis.title = element_text(size = 10), axis.text.x = element_text(size = 9, angle = 45, hjust = 1), axis.text.y = element_text(size = 9), strip.text = element_text(size=10, face="bold"), legend.position = "none")
print(rating_patterns_plot_median_3sd)
ggsave("rating_patterns_by_position_median_with_3sd_removed.png", rating_patterns_plot_median_3sd, width = 12, height = 8, dpi = 300)
# Store results
analysis_4_results_3sd <- list(
ideal_side = list(vector=ideal_side_vector_median_3sd, count=count_ideal_side_median_3sd, valid_n=valid_questions_ideal_3sd, binomial_test=binomial_result_ideal_median_3sd, summary_row=intermediacy_summary_3sd[1,]),
average_side = list(vector=average_side_vector_median_3sd, count=count_average_side_median_3sd, valid_n=valid_questions_avg_3sd, binomial_test=binomial_result_avg_side_median_3sd, summary_row=intermediacy_summary_3sd[2,]),
both_sides = list(vector=both_sides_vector_median_3sd, count=count_both_sides_median_3sd, valid_n=valid_questions_both_3sd, binomial_test=binomial_result_both_sides_median_3sd, summary_row=intermediacy_summary_3sd[3,]),
combined_summary = intermediacy_summary_3sd,
question_position = question_position_summary_median_3sd,
position_summary = position_summary_median_3sd
)
# Get unique country names
countries <- unique(Data_All$Country_name)
print(countries)
## [1] "USA" "Colombia" "Brazil" "Germany" "India"
## [6] "Lithuania" "Italy" "Poland" "Spain" "Netherlands"
# Create storage
country_results_median_no3sd_smallval <- list()
# Function to analyze median data for a single country (NO 3SD, Small Constant Log)
analyze_country_median_no3sd_smallval <- function(country, data_avg, data_ideal, data_reas, q_cols) {
cat("\n========== Analyzing Median Data (NO 3SD, Log Small Const) for:", country, "==========\n")
# Filter data by country AND apply attention check (using _Pass1 data)
country_average_pass <- data_avg %>% filter(Country_name == country, q19 == 15)
country_ideal_pass <- data_ideal %>% filter(Country_name == country, q19 == 15)
country_reasonable_pass <- data_reas %>% filter(Country_name == country, q19 == 15)
n_avg <- nrow(country_average_pass)
n_ideal <- nrow(country_ideal_pass)
n_reas <- nrow(country_reasonable_pass)
cat("Participants after attention check:", n_avg, n_ideal, n_reas, "\n")
if(n_avg < 1 || n_ideal < 1 || n_reas < 1) {
cat("Insufficient participants for", country, "\n"); return(NULL)
}
convert_to_numeric <- function(df, cols) { df %>% mutate(across(all_of(cols), ~ suppressWarnings(as.numeric(as.character(.))))) }
country_average_pass <- convert_to_numeric(country_average_pass, q_cols)
country_ideal_pass <- convert_to_numeric(country_ideal_pass, q_cols)
country_reasonable_pass <- convert_to_numeric(country_reasonable_pass, q_cols)
# Calculate medians
medians_avg <- if(n_avg > 0) sapply(country_average_pass[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
medians_ideal <- if(n_ideal > 0) sapply(country_ideal_pass[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
medians_reas <- if(n_reas > 0) sapply(country_reasonable_pass[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
names(medians_avg) <- q_cols; names(medians_ideal) <- q_cols; names(medians_reas) <- q_cols
# Convert to log scale (Small Constant)
log_medians_avg <- log_smallvalue(medians_avg)
log_medians_ideal <- log_smallvalue(medians_ideal)
log_medians_reas <- log_smallvalue(medians_reas)
# Data frame for regression
data_for_aic <- data.frame(Reasonable = log_medians_reas, Average = log_medians_avg, Ideal = log_medians_ideal)
data_for_aic_clean <- data_for_aic %>% filter(is.finite(Reasonable) & is.finite(Average) & is.finite(Ideal))
# Check for sufficient data points for regression
if(nrow(data_for_aic_clean) < 5) {
cat("Too few valid data points for regression analysis in", country, "\n")
return(list(country=country, sample_sizes=list(after_attention=c(n_avg, n_ideal, n_reas)), medians=list(average=medians_avg, ideal=medians_ideal, reasonable=medians_reas), log_medians=list(average=log_medians_avg, ideal=log_medians_ideal, reasonable=log_medians_reas), regression_summary=NULL, best_model="Insufficient Data"))
}
# Run Regressions
model1 <- lm(Reasonable ~ Average, data = data_for_aic_clean)
aic1 <- AIC(model1); r2_1 <- summary(model1)$r.squared
model2 <- lm(Reasonable ~ Ideal, data = data_for_aic_clean)
aic2 <- AIC(model2); r2_2 <- summary(model2)$r.squared
model3 <- lm(Reasonable ~ Average + Ideal, data = data_for_aic_clean)
aic3 <- AIC(model3); r2_3 <- summary(model3)$r.squared
# Regression summary
reg_summary <- data.frame(Model=c("Avg", "Ideal", "Both"), AIC=c(aic1, aic2, aic3), R_squared=c(r2_1, r2_2, r2_3))
best_model_idx <- which.min(c(aic1, aic2, aic3))
best_model_name <- c("Average", "Ideal", "Hybrid")[best_model_idx]
cat("Best model (NO 3SD, Log Small Const):", best_model_name, "\n")
return(list(country=country, sample_sizes=list(after_attention=c(n_avg, n_ideal, n_reas)), medians=list(average=medians_avg, ideal=medians_ideal, reasonable=medians_reas), log_medians=list(average=log_medians_avg, ideal=log_medians_ideal, reasonable=log_medians_reas), regression_models=list(model1=model1, model2=model2, model3=model3), regression_summary=reg_summary, aic_values=c(Model1=aic1, Model2=aic2, Model3=aic3), best_model=best_model_name))
}
# Run analysis for each country
for(country in countries) {
country_results_median_no3sd_smallval[[country]] <- analyze_country_median_no3sd_smallval(country, Data_All_Average_Pass1, Data_All_Ideal_Pass1, Data_All_Reasonable_Pass1, question_cols)
}
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: USA ==========
## Participants after attention check: 73 72 69
## Best model (NO 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: Colombia ==========
## Participants after attention check: 89 84 81
## Best model (NO 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: Brazil ==========
## Participants after attention check: 43 56 52
## Best model (NO 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: Germany ==========
## Participants after attention check: 71 72 72
## Best model (NO 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: India ==========
## Participants after attention check: 74 71 56
## Best model (NO 3SD, Log Small Const): Ideal
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: Lithuania ==========
## Participants after attention check: 76 76 77
## Best model (NO 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: Italy ==========
## Participants after attention check: 73 83 75
## Best model (NO 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: Poland ==========
## Participants after attention check: 90 87 90
## Best model (NO 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: Spain ==========
## Participants after attention check: 73 69 72
## Best model (NO 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log Small Const) for: Netherlands ==========
## Participants after attention check: 131 130 114
## Best model (NO 3SD, Log Small Const): Hybrid
# Extract AIC summary
country_aic_summary_median_no3sd_smallval <- data.frame( Country = character(), AIC_Average = numeric(), AIC_Ideal = numeric(), AIC_Hybrid = numeric(), R2_Average = numeric(), R2_Ideal = numeric(), R2_Hybrid = numeric(), Best_Model = character(), N_Avg = integer(), N_Ideal = integer(), N_Reas = integer(), stringsAsFactors = FALSE )
for(country in countries) {
res <- country_results_median_no3sd_smallval[[country]]
if(!is.null(res)) {
country_aic_summary_median_no3sd_smallval <- rbind(country_aic_summary_median_no3sd_smallval, data.frame( Country = country, AIC_Average = ifelse(!is.null(res$aic_values), res$aic_values["Model1"], NA), AIC_Ideal = ifelse(!is.null(res$aic_values), res$aic_values["Model2"], NA), AIC_Hybrid = ifelse(!is.null(res$aic_values), res$aic_values["Model3"], NA), R2_Average = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[1], NA), R2_Ideal = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[2], NA), R2_Hybrid = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[3], NA), Best_Model = res$best_model, N_Avg = res$sample_sizes$after_attention[1], N_Ideal = res$sample_sizes$after_attention[2], N_Reas = res$sample_sizes$after_attention[3], stringsAsFactors = FALSE))
}
}
# Display summary table - THIS WILL RENDER CORRECTLY IN KNITTED HTML
kable(country_aic_summary_median_no3sd_smallval, caption = "AIC/R² by Country (Median, NO 3SD, Log Small Constant)", digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
column_spec(8, background = case_when( is.na(country_aic_summary_median_no3sd_smallval$Best_Model) ~ "white", country_aic_summary_median_no3sd_smallval$Best_Model == "Hybrid" ~ "#e6f7ff", country_aic_summary_median_no3sd_smallval$Best_Model == "Average" ~ "#e6ffe6", country_aic_summary_median_no3sd_smallval$Best_Model == "Ideal" ~ "#ffe6e6", TRUE ~ "#f2f2f2" ))
| Country | AIC_Average | AIC_Ideal | AIC_Hybrid | R2_Average | R2_Ideal | R2_Hybrid | Best_Model | N_Avg | N_Ideal | N_Reas |
|---|---|---|---|---|---|---|---|---|---|---|
| USA | 77.422 | 115.073 | 45.067 | 0.799 | 0.372 | 0.929 | Hybrid | 73 | 72 | 69 |
| Colombia | 59.425 | 65.683 | 25.018 | 0.877 | 0.852 | 0.959 | Hybrid | 89 | 84 | 81 |
| Brazil | 72.292 | 118.221 | 61.027 | 0.817 | 0.263 | 0.877 | Hybrid | 43 | 56 | 52 |
| Germany | 70.116 | 117.835 | 47.873 | 0.846 | 0.348 | 0.926 | Hybrid | 71 | 72 | 72 |
| India | 34.665 | 26.444 | 28.339 | 0.725 | 0.786 | 0.786 | Ideal | 74 | 71 | 56 |
| Lithuania | 80.975 | 123.127 | 57.232 | 0.843 | 0.437 | 0.928 | Hybrid | 76 | 76 | 77 |
| Italy | 135.084 | 128.639 | 117.402 | 0.321 | 0.441 | 0.626 | Hybrid | 73 | 83 | 75 |
| Poland | 154.103 | 140.355 | 134.872 | 0.276 | 0.523 | 0.620 | Hybrid | 90 | 87 | 90 |
| Spain | 72.224 | 120.854 | 46.453 | 0.825 | 0.237 | 0.925 | Hybrid | 73 | 69 | 72 |
| Netherlands | 71.075 | 114.339 | 25.813 | 0.858 | 0.472 | 0.966 | Hybrid | 131 | 130 | 114 |
# Create storage
country_results_median_no3sd_plusone <- list()
# Function to analyze median data for a single country (NO 3SD, Plus One Log)
analyze_country_median_no3sd_plusone <- function(country, data_avg, data_ideal, data_reas, q_cols) {
cat("\n========== Analyzing Median Data (NO 3SD, Log+1) for:", country, "==========\n")
# Filter data by country AND apply attention check (using _Pass1 data)
country_average_pass <- data_avg %>% filter(Country_name == country, q19 == 15)
country_ideal_pass <- data_ideal %>% filter(Country_name == country, q19 == 15)
country_reasonable_pass <- data_reas %>% filter(Country_name == country, q19 == 15)
n_avg <- nrow(country_average_pass)
n_ideal <- nrow(country_ideal_pass)
n_reas <- nrow(country_reasonable_pass)
cat("Participants after attention check:", n_avg, n_ideal, n_reas, "\n")
if(n_avg < 1 || n_ideal < 1 || n_reas < 1) {
cat("Insufficient participants for", country, "\n"); return(NULL)
}
convert_to_numeric <- function(df, cols) { df %>% mutate(across(all_of(cols), ~ suppressWarnings(as.numeric(as.character(.))))) }
country_average_pass <- convert_to_numeric(country_average_pass, q_cols)
country_ideal_pass <- convert_to_numeric(country_ideal_pass, q_cols)
country_reasonable_pass <- convert_to_numeric(country_reasonable_pass, q_cols)
# Calculate medians
medians_avg <- if(n_avg > 0) sapply(country_average_pass[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
medians_ideal <- if(n_ideal > 0) sapply(country_ideal_pass[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
medians_reas <- if(n_reas > 0) sapply(country_reasonable_pass[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
names(medians_avg) <- q_cols; names(medians_ideal) <- q_cols; names(medians_reas) <- q_cols
# Convert to log scale (Plus One)
log_medians_avg <- log_plus_one(medians_avg)
log_medians_ideal <- log_plus_one(medians_ideal)
log_medians_reas <- log_plus_one(medians_reas)
# Data frame for regression
data_for_aic <- data.frame(Reasonable = log_medians_reas, Average = log_medians_avg, Ideal = log_medians_ideal)
data_for_aic_clean <- data_for_aic %>% filter(is.finite(Reasonable) & is.finite(Average) & is.finite(Ideal))
# Check for sufficient data points for regression
if(nrow(data_for_aic_clean) < 5) {
cat("Too few valid data points for regression analysis in", country, "\n")
return(list(country=country, sample_sizes=list(after_attention=c(n_avg, n_ideal, n_reas)), medians=list(average=medians_avg, ideal=medians_ideal, reasonable=medians_reas), log_medians=list(average=log_medians_avg, ideal=log_medians_ideal, reasonable=log_medians_reas), regression_summary=NULL, best_model="Insufficient Data"))
}
# Run Regressions
model1 <- lm(Reasonable ~ Average, data = data_for_aic_clean)
aic1 <- AIC(model1); r2_1 <- summary(model1)$r.squared
model2 <- lm(Reasonable ~ Ideal, data = data_for_aic_clean)
aic2 <- AIC(model2); r2_2 <- summary(model2)$r.squared
model3 <- lm(Reasonable ~ Average + Ideal, data = data_for_aic_clean)
aic3 <- AIC(model3); r2_3 <- summary(model3)$r.squared
# Regression summary
reg_summary <- data.frame(Model=c("Avg", "Ideal", "Both"), AIC=c(aic1, aic2, aic3), R_squared=c(r2_1, r2_2, r2_3))
best_model_idx <- which.min(c(aic1, aic2, aic3))
best_model_name <- c("Average", "Ideal", "Hybrid")[best_model_idx]
cat("Best model (NO 3SD, Log+1):", best_model_name, "\n") # Keep this text output if desired
return(list(country=country, sample_sizes=list(after_attention=c(n_avg, n_ideal, n_reas)), medians=list(average=medians_avg, ideal=medians_ideal, reasonable=medians_reas), log_medians=list(average=log_medians_avg, ideal=log_medians_ideal, reasonable=log_medians_reas), regression_models=list(model1=model1, model2=model2, model3=model3), regression_summary=reg_summary, aic_values=c(Model1=aic1, Model2=aic2, Model3=aic3), best_model=best_model_name))
}
# Run analysis for each country
for(country in countries) {
country_results_median_no3sd_plusone[[country]] <- analyze_country_median_no3sd_plusone(country, Data_All_Average_Pass1, Data_All_Ideal_Pass1, Data_All_Reasonable_Pass1, question_cols)
}
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: USA ==========
## Participants after attention check: 73 72 69
## Best model (NO 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: Colombia ==========
## Participants after attention check: 89 84 81
## Best model (NO 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: Brazil ==========
## Participants after attention check: 43 56 52
## Best model (NO 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: Germany ==========
## Participants after attention check: 71 72 72
## Best model (NO 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: India ==========
## Participants after attention check: 74 71 56
## Best model (NO 3SD, Log+1): Ideal
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: Lithuania ==========
## Participants after attention check: 76 76 77
## Best model (NO 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: Italy ==========
## Participants after attention check: 73 83 75
## Best model (NO 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: Poland ==========
## Participants after attention check: 90 87 90
## Best model (NO 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: Spain ==========
## Participants after attention check: 73 69 72
## Best model (NO 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (NO 3SD, Log+1) for: Netherlands ==========
## Participants after attention check: 131 130 114
## Best model (NO 3SD, Log+1): Hybrid
# Extract AIC summary
country_aic_summary_median_no3sd_plusone <- data.frame( Country = character(), AIC_Average = numeric(), AIC_Ideal = numeric(), AIC_Hybrid = numeric(), R2_Average = numeric(), R2_Ideal = numeric(), R2_Hybrid = numeric(), Best_Model = character(), N_Avg = integer(), N_Ideal = integer(), N_Reas = integer(), stringsAsFactors = FALSE )
for(country in countries) {
res <- country_results_median_no3sd_plusone[[country]]
if(!is.null(res)) {
country_aic_summary_median_no3sd_plusone <- rbind(country_aic_summary_median_no3sd_plusone, data.frame( Country = country, AIC_Average = ifelse(!is.null(res$aic_values), res$aic_values["Model1"], NA), AIC_Ideal = ifelse(!is.null(res$aic_values), res$aic_values["Model2"], NA), AIC_Hybrid = ifelse(!is.null(res$aic_values), res$aic_values["Model3"], NA), R2_Average = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[1], NA), R2_Ideal = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[2], NA), R2_Hybrid = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[3], NA), Best_Model = res$best_model, N_Avg = res$sample_sizes$after_attention[1], N_Ideal = res$sample_sizes$after_attention[2], N_Reas = res$sample_sizes$after_attention[3], stringsAsFactors = FALSE))
}
}
# Display summary table - THIS WILL RENDER CORRECTLY IN KNITTED HTML
kable(country_aic_summary_median_no3sd_plusone, caption = "AIC/R² by Country (Median, NO 3SD, Log Plus One)", digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
column_spec(8, background = case_when( is.na(country_aic_summary_median_no3sd_plusone$Best_Model) ~ "white", country_aic_summary_median_no3sd_plusone$Best_Model == "Hybrid" ~ "#e6f7ff", country_aic_summary_median_no3sd_plusone$Best_Model == "Average" ~ "#e6ffe6", country_aic_summary_median_no3sd_plusone$Best_Model == "Ideal" ~ "#ffe6e6", TRUE ~ "#f2f2f2" ))
| Country | AIC_Average | AIC_Ideal | AIC_Hybrid | R2_Average | R2_Ideal | R2_Hybrid | Best_Model | N_Avg | N_Ideal | N_Reas |
|---|---|---|---|---|---|---|---|---|---|---|
| USA | 71.341 | 96.354 | 31.173 | 0.809 | 0.592 | 0.947 | Hybrid | 73 | 72 | 69 |
| Colombia | 52.671 | 50.030 | 11.213 | 0.888 | 0.896 | 0.970 | Hybrid | 89 | 84 | 81 |
| Brazil | 65.885 | 91.728 | 42.834 | 0.832 | 0.633 | 0.921 | Hybrid | 43 | 56 | 52 |
| Germany | 60.639 | 89.373 | 29.107 | 0.868 | 0.684 | 0.952 | Hybrid | 71 | 72 | 72 |
| India | 27.616 | 18.286 | 20.283 | 0.718 | 0.787 | 0.787 | Ideal | 74 | 71 | 56 |
| Lithuania | 76.326 | 83.534 | 29.576 | 0.845 | 0.807 | 0.965 | Hybrid | 76 | 76 | 77 |
| Italy | 108.514 | 90.092 | 79.292 | 0.486 | 0.706 | 0.801 | Hybrid | 73 | 83 | 75 |
| Poland | 108.270 | 92.209 | 75.961 | 0.606 | 0.758 | 0.861 | Hybrid | 90 | 87 | 90 |
| Spain | 65.335 | 101.746 | 35.004 | 0.840 | 0.519 | 0.940 | Hybrid | 73 | 69 | 72 |
| Netherlands | 60.443 | 79.246 | -3.602 | 0.873 | 0.776 | 0.983 | Hybrid | 131 | 130 | 114 |
# Create storage
country_results_median_3sd_smallval <- list()
# Function to analyze median data for a single country (WITH 3SD, Small Constant Log)
analyze_country_median_3sd_smallval <- function(country, data_avg_filtered, data_ideal_filtered, data_reas_filtered, q_cols) {
cat("\n========== Analyzing Median Data (WITH 3SD, Log Small Const) for:", country, "==========\n")
# Filter data by country (using _Filtered data)
country_average_final <- data_avg_filtered %>% filter(Country_name == country)
country_ideal_final <- data_ideal_filtered %>% filter(Country_name == country)
country_reasonable_final <- data_reas_filtered %>% filter(Country_name == country)
n_avg <- nrow(country_average_final)
n_ideal <- nrow(country_ideal_final)
n_reas <- nrow(country_reasonable_final)
cat("Participants after ALL filters:", n_avg, n_ideal, n_reas, "\n")
if(n_avg < 1 || n_ideal < 1 || n_reas < 1) {
cat("Insufficient participants for", country, "\n"); return(NULL)
}
# Columns should already be numeric from section 3, but double-check
convert_to_numeric <- function(df, cols) { df %>% mutate(across(all_of(cols), ~ suppressWarnings(as.numeric(as.character(.))))) }
country_average_final <- convert_to_numeric(country_average_final, q_cols)
country_ideal_final <- convert_to_numeric(country_ideal_final, q_cols)
country_reasonable_final <- convert_to_numeric(country_reasonable_final, q_cols)
# Calculate medians
medians_avg <- if(n_avg > 0) sapply(country_average_final[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
medians_ideal <- if(n_ideal > 0) sapply(country_ideal_final[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
medians_reas <- if(n_reas > 0) sapply(country_reasonable_final[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
names(medians_avg) <- q_cols; names(medians_ideal) <- q_cols; names(medians_reas) <- q_cols
# Convert to log scale (Small Constant)
log_medians_avg <- log_smallvalue(medians_avg)
log_medians_ideal <- log_smallvalue(medians_ideal)
log_medians_reas <- log_smallvalue(medians_reas)
# Data frame for regression
data_for_aic <- data.frame(Reasonable = log_medians_reas, Average = log_medians_avg, Ideal = log_medians_ideal)
data_for_aic_clean <- data_for_aic %>% filter(is.finite(Reasonable) & is.finite(Average) & is.finite(Ideal))
# Check for sufficient data points for regression
if(nrow(data_for_aic_clean) < 5) {
cat("Too few valid data points for regression analysis in", country, "\n")
return(list(country=country, sample_sizes=list(after_all_filters=c(n_avg, n_ideal, n_reas)), medians=list(average=medians_avg, ideal=medians_ideal, reasonable=medians_reas), log_medians=list(average=log_medians_avg, ideal=log_medians_ideal, reasonable=log_medians_reas), regression_summary=NULL, best_model="Insufficient Data"))
}
# Run Regressions
model1 <- lm(Reasonable ~ Average, data = data_for_aic_clean)
aic1 <- AIC(model1); r2_1 <- summary(model1)$r.squared
model2 <- lm(Reasonable ~ Ideal, data = data_for_aic_clean)
aic2 <- AIC(model2); r2_2 <- summary(model2)$r.squared
model3 <- lm(Reasonable ~ Average + Ideal, data = data_for_aic_clean)
aic3 <- AIC(model3); r2_3 <- summary(model3)$r.squared
# Regression summary
reg_summary <- data.frame(Model=c("Avg", "Ideal", "Both"), AIC=c(aic1, aic2, aic3), R_squared=c(r2_1, r2_2, r2_3))
best_model_idx <- which.min(c(aic1, aic2, aic3))
best_model_name <- c("Average", "Ideal", "Hybrid")[best_model_idx]
cat("Best model (WITH 3SD, Log Small Const):", best_model_name, "\n") # Keep this text output if desired
return(list(country=country, sample_sizes=list(after_all_filters=c(n_avg, n_ideal, n_reas)), medians=list(average=medians_avg, ideal=medians_ideal, reasonable=medians_reas), log_medians=list(average=log_medians_avg, ideal=log_medians_ideal, reasonable=log_medians_reas), regression_models=list(model1=model1, model2=model2, model3=model3), regression_summary=reg_summary, aic_values=c(Model1=aic1, Model2=aic2, Model3=aic3), best_model=best_model_name))
}
# Run analysis for each country
for(country in countries) {
country_results_median_3sd_smallval[[country]] <- analyze_country_median_3sd_smallval(country, Data_All_Average_Filtered, Data_All_Ideal_Filtered, Data_All_Reasonable_Filtered, question_cols)
}
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: USA ==========
## Participants after ALL filters: 62 54 59
## Best model (WITH 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: Colombia ==========
## Participants after ALL filters: 64 60 62
## Best model (WITH 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: Brazil ==========
## Participants after ALL filters: 30 36 33
## Best model (WITH 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: Germany ==========
## Participants after ALL filters: 51 50 58
## Best model (WITH 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: India ==========
## Participants after ALL filters: 46 45 32
## Best model (WITH 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: Lithuania ==========
## Participants after ALL filters: 69 58 66
## Best model (WITH 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: Italy ==========
## Participants after ALL filters: 53 70 63
## Best model (WITH 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: Poland ==========
## Participants after ALL filters: 81 65 83
## Best model (WITH 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: Spain ==========
## Participants after ALL filters: 61 57 58
## Best model (WITH 3SD, Log Small Const): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log Small Const) for: Netherlands ==========
## Participants after ALL filters: 92 113 76
## Best model (WITH 3SD, Log Small Const): Hybrid
# Extract AIC summary
country_aic_summary_median_3sd_smallval <- data.frame( Country = character(), AIC_Average = numeric(), AIC_Ideal = numeric(), AIC_Hybrid = numeric(), R2_Average = numeric(), R2_Ideal = numeric(), R2_Hybrid = numeric(), Best_Model = character(), N_Avg = integer(), N_Ideal = integer(), N_Reas = integer(), stringsAsFactors = FALSE )
for(country in countries) {
res <- country_results_median_3sd_smallval[[country]]
if(!is.null(res)) {
country_aic_summary_median_3sd_smallval <- rbind(country_aic_summary_median_3sd_smallval, data.frame( Country = country, AIC_Average = ifelse(!is.null(res$aic_values), res$aic_values["Model1"], NA), AIC_Ideal = ifelse(!is.null(res$aic_values), res$aic_values["Model2"], NA), AIC_Hybrid = ifelse(!is.null(res$aic_values), res$aic_values["Model3"], NA), R2_Average = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[1], NA), R2_Ideal = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[2], NA), R2_Hybrid = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[3], NA), Best_Model = res$best_model, N_Avg = res$sample_sizes$after_all_filters[1], N_Ideal = res$sample_sizes$after_all_filters[2], N_Reas = res$sample_sizes$after_all_filters[3], stringsAsFactors = FALSE))
}
}
# Display summary table - THIS WILL RENDER CORRECTLY IN KNITTED HTML
kable(country_aic_summary_median_3sd_smallval, caption = "AIC/R² by Country (Median, WITH 3SD, Log Small Constant)", digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
column_spec(8, background = case_when( is.na(country_aic_summary_median_3sd_smallval$Best_Model) ~ "white", country_aic_summary_median_3sd_smallval$Best_Model == "Hybrid" ~ "#e6f7ff", country_aic_summary_median_3sd_smallval$Best_Model == "Average" ~ "#e6ffe6", country_aic_summary_median_3sd_smallval$Best_Model == "Ideal" ~ "#ffe6e6", TRUE ~ "#f2f2f2" ))
| Country | AIC_Average | AIC_Ideal | AIC_Hybrid | R2_Average | R2_Ideal | R2_Hybrid | Best_Model | N_Avg | N_Ideal | N_Reas |
|---|---|---|---|---|---|---|---|---|---|---|
| USA | 85.844 | 112.697 | 52.644 | 0.743 | 0.420 | 0.912 | Hybrid | 62 | 54 | 59 |
| Colombia | 54.749 | 93.865 | 45.604 | 0.891 | 0.643 | 0.922 | Hybrid | 64 | 60 | 62 |
| Brazil | 75.125 | 117.416 | 64.502 | 0.794 | 0.259 | 0.860 | Hybrid | 30 | 36 | 33 |
| Germany | 76.723 | 116.831 | 50.279 | 0.815 | 0.378 | 0.922 | Hybrid | 51 | 50 | 58 |
| India | 40.269 | 13.551 | 9.429 | 0.511 | 0.782 | 0.819 | Hybrid | 46 | 45 | 32 |
| Lithuania | 86.177 | 121.409 | 58.343 | 0.816 | 0.466 | 0.926 | Hybrid | 69 | 58 | 66 |
| Italy | 151.767 | 135.116 | 134.692 | 0.120 | 0.469 | 0.506 | Hybrid | 53 | 70 | 63 |
| Poland | 153.461 | 141.881 | 136.687 | 0.286 | 0.497 | 0.596 | Hybrid | 81 | 65 | 83 |
| Spain | 76.557 | 121.425 | 52.492 | 0.803 | 0.233 | 0.911 | Hybrid | 61 | 57 | 58 |
| Netherlands | 103.249 | 121.498 | 76.198 | 0.730 | 0.531 | 0.888 | Hybrid | 92 | 113 | 76 |
# Create storage
country_results_median_3sd_plusone <- list()
# Function to analyze median data for a single country (WITH 3SD, Plus One Log)
analyze_country_median_3sd_plusone <- function(country, data_avg_filtered, data_ideal_filtered, data_reas_filtered, q_cols) {
cat("\n========== Analyzing Median Data (WITH 3SD, Log+1) for:", country, "==========\n")
# Filter data by country (using _Filtered data)
country_average_final <- data_avg_filtered %>% filter(Country_name == country)
country_ideal_final <- data_ideal_filtered %>% filter(Country_name == country)
country_reasonable_final <- data_reas_filtered %>% filter(Country_name == country)
n_avg <- nrow(country_average_final)
n_ideal <- nrow(country_ideal_final)
n_reas <- nrow(country_reasonable_final)
cat("Participants after ALL filters:", n_avg, n_ideal, n_reas, "\n")
if(n_avg < 1 || n_ideal < 1 || n_reas < 1) {
cat("Insufficient participants for", country, "\n"); return(NULL)
}
convert_to_numeric <- function(df, cols) { df %>% mutate(across(all_of(cols), ~ suppressWarnings(as.numeric(as.character(.))))) }
country_average_final <- convert_to_numeric(country_average_final, q_cols)
country_ideal_final <- convert_to_numeric(country_ideal_final, q_cols)
country_reasonable_final <- convert_to_numeric(country_reasonable_final, q_cols)
# Calculate medians
medians_avg <- if(n_avg > 0) sapply(country_average_final[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
medians_ideal <- if(n_ideal > 0) sapply(country_ideal_final[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
medians_reas <- if(n_reas > 0) sapply(country_reasonable_final[, q_cols], median, na.rm = TRUE) else rep(NA, length(q_cols))
names(medians_avg) <- q_cols; names(medians_ideal) <- q_cols; names(medians_reas) <- q_cols
# Convert to log scale (Plus One)
log_medians_avg <- log_plus_one(medians_avg)
log_medians_ideal <- log_plus_one(medians_ideal)
log_medians_reas <- log_plus_one(medians_reas)
# Data frame for regression
data_for_aic <- data.frame(Reasonable = log_medians_reas, Average = log_medians_avg, Ideal = log_medians_ideal)
data_for_aic_clean <- data_for_aic %>% filter(is.finite(Reasonable) & is.finite(Average) & is.finite(Ideal))
# Check for sufficient data points for regression
if(nrow(data_for_aic_clean) < 5) {
cat("Too few valid data points for regression analysis in", country, "\n")
return(list(country=country, sample_sizes=list(after_all_filters=c(n_avg, n_ideal, n_reas)), medians=list(average=medians_avg, ideal=medians_ideal, reasonable=medians_reas), log_medians=list(average=log_medians_avg, ideal=log_medians_ideal, reasonable=log_medians_reas), regression_summary=NULL, best_model="Insufficient Data"))
}
# Run Regressions
model1 <- lm(Reasonable ~ Average, data = data_for_aic_clean)
aic1 <- AIC(model1); r2_1 <- summary(model1)$r.squared
model2 <- lm(Reasonable ~ Ideal, data = data_for_aic_clean)
aic2 <- AIC(model2); r2_2 <- summary(model2)$r.squared
model3 <- lm(Reasonable ~ Average + Ideal, data = data_for_aic_clean)
aic3 <- AIC(model3); r2_3 <- summary(model3)$r.squared
# Regression summary
reg_summary <- data.frame(Model=c("Avg", "Ideal", "Both"), AIC=c(aic1, aic2, aic3), R_squared=c(r2_1, r2_2, r2_3))
best_model_idx <- which.min(c(aic1, aic2, aic3))
best_model_name <- c("Average", "Ideal", "Hybrid")[best_model_idx]
cat("Best model (WITH 3SD, Log+1):", best_model_name, "\n") # Keep this text output if desired
return(list(country=country, sample_sizes=list(after_all_filters=c(n_avg, n_ideal, n_reas)), medians=list(average=medians_avg, ideal=medians_ideal, reasonable=medians_reas), log_medians=list(average=log_medians_avg, ideal=log_medians_ideal, reasonable=log_medians_reas), regression_models=list(model1=model1, model2=model2, model3=model3), regression_summary=reg_summary, aic_values=c(Model1=aic1, Model2=aic2, Model3=aic3), best_model=best_model_name))
}
# Run analysis for each country
for(country in countries) {
country_results_median_3sd_plusone[[country]] <- analyze_country_median_3sd_plusone(country, Data_All_Average_Filtered, Data_All_Ideal_Filtered, Data_All_Reasonable_Filtered, question_cols)
}
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: USA ==========
## Participants after ALL filters: 62 54 59
## Best model (WITH 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: Colombia ==========
## Participants after ALL filters: 64 60 62
## Best model (WITH 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: Brazil ==========
## Participants after ALL filters: 30 36 33
## Best model (WITH 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: Germany ==========
## Participants after ALL filters: 51 50 58
## Best model (WITH 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: India ==========
## Participants after ALL filters: 46 45 32
## Best model (WITH 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: Lithuania ==========
## Participants after ALL filters: 69 58 66
## Best model (WITH 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: Italy ==========
## Participants after ALL filters: 53 70 63
## Best model (WITH 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: Poland ==========
## Participants after ALL filters: 81 65 83
## Best model (WITH 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: Spain ==========
## Participants after ALL filters: 61 57 58
## Best model (WITH 3SD, Log+1): Hybrid
##
## ========== Analyzing Median Data (WITH 3SD, Log+1) for: Netherlands ==========
## Participants after ALL filters: 92 113 76
## Best model (WITH 3SD, Log+1): Hybrid
# Extract AIC summary
country_aic_summary_median_3sd_plusone <- data.frame( Country = character(), AIC_Average = numeric(), AIC_Ideal = numeric(), AIC_Hybrid = numeric(), R2_Average = numeric(), R2_Ideal = numeric(), R2_Hybrid = numeric(), Best_Model = character(), N_Avg = integer(), N_Ideal = integer(), N_Reas = integer(), stringsAsFactors = FALSE )
for(country in countries) {
res <- country_results_median_3sd_plusone[[country]]
if(!is.null(res)) {
country_aic_summary_median_3sd_plusone <- rbind(country_aic_summary_median_3sd_plusone, data.frame( Country = country, AIC_Average = ifelse(!is.null(res$aic_values), res$aic_values["Model1"], NA), AIC_Ideal = ifelse(!is.null(res$aic_values), res$aic_values["Model2"], NA), AIC_Hybrid = ifelse(!is.null(res$aic_values), res$aic_values["Model3"], NA), R2_Average = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[1], NA), R2_Ideal = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[2], NA), R2_Hybrid = ifelse(!is.null(res$regression_summary), res$regression_summary$R_squared[3], NA), Best_Model = res$best_model, N_Avg = res$sample_sizes$after_all_filters[1], N_Ideal = res$sample_sizes$after_all_filters[2], N_Reas = res$sample_sizes$after_all_filters[3], stringsAsFactors = FALSE))
}
}
# Display summary table - THIS WILL RENDER CORRECTLY IN KNITTED HTML
kable(country_aic_summary_median_3sd_plusone, caption = "AIC/R² by Country (Median, WITH 3SD, Log Plus One)", digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
column_spec(8, background = case_when( is.na(country_aic_summary_median_3sd_plusone$Best_Model) ~ "white", country_aic_summary_median_3sd_plusone$Best_Model == "Hybrid" ~ "#e6f7ff", country_aic_summary_median_3sd_plusone$Best_Model == "Average" ~ "#e6ffe6", country_aic_summary_median_3sd_plusone$Best_Model == "Ideal" ~ "#ffe6e6", TRUE ~ "#f2f2f2" ))
| Country | AIC_Average | AIC_Ideal | AIC_Hybrid | R2_Average | R2_Ideal | R2_Hybrid | Best_Model | N_Avg | N_Ideal | N_Reas |
|---|---|---|---|---|---|---|---|---|---|---|
| USA | 79.894 | 95.638 | 45.222 | 0.754 | 0.604 | 0.919 | Hybrid | 62 | 54 | 59 |
| Colombia | 47.162 | 57.224 | 20.019 | 0.902 | 0.867 | 0.959 | Hybrid | 64 | 60 | 62 |
| Brazil | 69.234 | 91.498 | 43.311 | 0.809 | 0.626 | 0.918 | Hybrid | 30 | 36 | 33 |
| Germany | 68.432 | 86.987 | 27.282 | 0.835 | 0.710 | 0.955 | Hybrid | 51 | 50 | 58 |
| India | 29.787 | -0.194 | -5.491 | 0.513 | 0.804 | 0.843 | Hybrid | 46 | 45 | 32 |
| Lithuania | 82.010 | 79.570 | 27.957 | 0.816 | 0.829 | 0.966 | Hybrid | 69 | 58 | 66 |
| Italy | 113.855 | 92.055 | 86.300 | 0.417 | 0.699 | 0.762 | Hybrid | 53 | 70 | 63 |
| Poland | 106.764 | 91.293 | 75.451 | 0.620 | 0.762 | 0.862 | Hybrid | 81 | 65 | 83 |
| Spain | 68.818 | 102.546 | 39.965 | 0.824 | 0.511 | 0.931 | Hybrid | 61 | 57 | 58 |
| Netherlands | 72.615 | 73.724 | 11.639 | 0.826 | 0.820 | 0.974 | Hybrid | 92 | 113 | 76 |
# Create storage
country_intermediacy_results_median_no3sd <- list()
# Function to analyze median intermediacy for a single country (NO 3SD)
analyze_country_intermediacy_median_no3sd <- function(country, country_results_list) {
cat("\n========== Analyzing Median Intermediacy (NO 3SD) for:", country, "==========\n")
# Use the median data from Analysis 7 NO 3SD
if(is.null(country_results_list[[country]]) || is.null(country_results_list[[country]]$medians)) {
cat("No valid median data from Analysis 7 for", country, "\n"); return(NULL)
}
medians_avg <- country_results_list[[country]]$medians$average
medians_ideal <- country_results_list[[country]]$medians$ideal
medians_reas <- country_results_list[[country]]$medians$reasonable
if(all(is.na(medians_avg)) || all(is.na(medians_ideal)) || all(is.na(medians_reas))){
cat("Median vectors contain only NAs for", country, "\n"); return(NULL)
}
# --- Calculate vectors, counts, and p-values ---
# Assumes is_ideal_side_median, is_average_side_median, is_both_sides_median are defined
ideal_vec <- mapply(is_ideal_side_median, medians_avg, medians_ideal, medians_reas)
ideal_count <- sum(ideal_vec, na.rm=TRUE); ideal_n <- sum(!is.na(ideal_vec))
ideal_p <- NA; ideal_test <- NULL
if(ideal_n > 0){ ideal_test <- tryCatch(binom.test(ideal_count, ideal_n, p=0.5, alt="greater"), error=function(e) NULL); if(!is.null(ideal_test)) ideal_p <- ideal_test$p.value }
avg_vec <- mapply(is_average_side_median, medians_avg, medians_ideal, medians_reas)
avg_count <- sum(avg_vec, na.rm=TRUE); avg_n <- sum(!is.na(avg_vec))
avg_p <- NA; avg_test <- NULL
if(avg_n > 0){ avg_test <- tryCatch(binom.test(avg_count, avg_n, p=0.5, alt="greater"), error=function(e) NULL); if(!is.null(avg_test)) avg_p <- avg_test$p.value }
both_vec <- mapply(is_both_sides_median, medians_avg, medians_ideal, medians_reas)
both_count <- sum(both_vec, na.rm=TRUE); both_n <- sum(!is.na(both_vec))
both_p <- NA; both_test <- NULL
if(both_n > 0){ both_test <- tryCatch(binom.test(both_count, both_n, p=1/3, alt="two.sided"), error=function(e) NULL); if(!is.null(both_test)) both_p <- both_test$p.value }
# Create summary table
summary_df <- data.frame(
Test = c("On Ideal Side", "On Average Side", "Between"),
Count = c(ideal_count, avg_count, both_count),
N = c(ideal_n, avg_n, both_n),
Prop = c(ideal_count/ideal_n, avg_count/avg_n, both_count/both_n),
p_value_raw = c(ideal_p, avg_p, both_p) # Store raw p-value
)
summary_df$Prop <- ifelse(is.nan(summary_df$Prop), NA, summary_df$Prop)
# Format p-value HERE using the defined helper function
summary_df$p_value_formatted <- sapply(summary_df$p_value_raw, format_pvalue)
# Return results including raw p-values
return(list(country=country,
ideal_side=list(vector=ideal_vec, count=ideal_count, n=ideal_n, prop=ideal_count/ideal_n, test=ideal_test),
average_side=list(vector=avg_vec, count=avg_count, n=avg_n, prop=avg_count/avg_n, test=avg_test),
both_sides=list(vector=both_vec, count=both_count, n=both_n, prop=both_count/both_n, test=both_test),
summary_df=summary_df)) # Return the df with raw and formatted p-values
}
# Run analysis for each country (using one of the NO 3SD results lists, e.g., _plusone)
analysis_7_results_no3sd_source <- country_results_median_no3sd_plusone # Choose source
for(country in countries) {
if (exists("analysis_7_results_no3sd_source") && !is.null(analysis_7_results_no3sd_source[[country]])) {
country_intermediacy_results_median_no3sd[[country]] <- analyze_country_intermediacy_median_no3sd(country, analysis_7_results_no3sd_source)
} else {
cat("\nSkipping intermediacy (NO 3SD) for", country, "due to missing results from Analysis 7.\n")
}
}
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: USA ==========
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: Colombia ==========
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: Brazil ==========
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: Germany ==========
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: India ==========
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: Lithuania ==========
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: Italy ==========
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: Poland ==========
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: Spain ==========
##
## ========== Analyzing Median Intermediacy (NO 3SD) for: Netherlands ==========
# Create summary table for median intermediacy across countries (NO 3SD)
intermediacy_by_country_median_no3sd <- data.frame(Country=character(), Ideal_Prop=numeric(), Ideal_P=numeric(), Ideal_N=integer(), Avg_Prop=numeric(), Avg_P=numeric(), Avg_N=integer(), Between_Prop=numeric(), Between_P=numeric(), Between_N=integer(), stringsAsFactors=FALSE)
for(country in countries) {
res <- country_intermediacy_results_median_no3sd[[country]]
if(!is.null(res) && !is.null(res$summary_df)) { # Check if summary_df exists
ideal_p_val <- res$summary_df$p_value_raw[1]
avg_p_val <- res$summary_df$p_value_raw[2]
between_p_val <- res$summary_df$p_value_raw[3]
intermediacy_by_country_median_no3sd <- rbind(intermediacy_by_country_median_no3sd, data.frame(Country=country,
Ideal_Prop=res$ideal_side$prop, Ideal_P=ideal_p_val, Ideal_N=res$ideal_side$n,
Avg_Prop=res$average_side$prop, Avg_P=avg_p_val, Avg_N=res$average_side$n,
Between_Prop=res$both_sides$prop, Between_P=between_p_val, Between_N=res$both_sides$n,
stringsAsFactors=FALSE))
} else {
# Add row with NAs if results for country are NULL or incomplete
intermediacy_by_country_median_no3sd <- rbind(intermediacy_by_country_median_no3sd, data.frame(Country=country, Ideal_Prop=NA, Ideal_P=NA, Ideal_N=NA, Avg_Prop=NA, Avg_P=NA, Avg_N=NA, Between_Prop=NA, Between_P=NA, Between_N=NA, stringsAsFactors=FALSE))
}
}
# Format and display
# Handle potential NAs introduced before formatting
intermediacy_by_country_median_no3sd <- intermediacy_by_country_median_no3sd %>%
mutate(
Ideal_Prop_Pct = ifelse(is.na(Ideal_Prop), "NA", scales::percent(Ideal_Prop, accuracy = 0.1)),
Avg_Prop_Pct = ifelse(is.na(Avg_Prop), "NA", scales::percent(Avg_Prop, accuracy = 0.1)),
Between_Prop_Pct = ifelse(is.na(Between_Prop), "NA", scales::percent(Between_Prop, accuracy = 0.1)),
Ideal_Sig = ifelse(!is.na(Ideal_P) & Ideal_P < 0.05, "*", ""),
Avg_Sig = ifelse(!is.na(Avg_P) & Avg_P < 0.05, "*", ""),
Between_Sig = ifelse(!is.na(Between_P) & Between_P < 0.05, "*", "")
)
intermediacy_by_country_median_no3sd <- intermediacy_by_country_median_no3sd %>%
arrange(desc(Between_Prop)) # Arrange using the numeric column
# Corrected select statement (replaces the old select %>% rename)
display_intermediacy_no3sd <- intermediacy_by_country_median_no3sd %>%
select(Country,
`Ideal Side (%)` = Ideal_Prop_Pct, Ideal_Sig = Ideal_Sig,
`Average Side (%)` = Avg_Prop_Pct, Average_Sig = Avg_Sig,
`Between (%)` = Between_Prop_Pct, Between_Sig = Between_Sig
)
kable(display_intermediacy_no3sd, caption = "Intermediacy Analysis by Country (Median, NO 3SD Removal)", align = 'lcr') %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F) %>%
add_header_above(c(" " = 1, "Ideal Side*" = 2, "Average Side*" = 2, "Between*" = 2)) %>%
column_spec(1, bold = T)
| Country | Ideal Side (%) | Ideal_Sig | Average Side (%) | Average_Sig | Between (%) | Between_Sig |
|---|---|---|---|---|---|---|
| Lithuania | 81.8% |
|
42.4% | 100.0% |
|
|
| Germany | 84.8% |
|
33.3% | 97.0% |
|
|
| Poland | 72.7% |
|
42.4% | 97.0% |
|
|
| USA | 90.9% |
|
42.4% | 93.9% |
|
|
| Colombia | 81.8% |
|
36.4% | 93.9% |
|
|
| Spain | 72.7% |
|
51.5% | 93.9% |
|
|
| Italy | 72.7% |
|
33.3% | 90.9% |
|
|
| Netherlands | 81.8% |
|
39.4% | 90.9% |
|
|
| Brazil | 75.8% |
|
33.3% | 84.8% |
|
|
| India | 81.8% |
|
39.4% | 30.3% |
# Store results
analysis_8_results_no3sd <- list(
country_results = country_intermediacy_results_median_no3sd,
summary = intermediacy_by_country_median_no3sd # Contains raw P values and formatted props
)
# Analysis 8 [WITH 3 SD Removed]
# Create storage
country_intermediacy_results_median_3sd <- list()
# Function to analyze median intermediacy for a single country (WITH 3SD)
analyze_country_intermediacy_median_3sd <- function(country, country_results_list) {
cat("\n========== Analyzing Median Intermediacy (WITH 3SD) for:", country, "==========\n")
# Use the median data from Analysis 7 WITH 3SD (either _smallval or _plusone is fine)
if(is.null(country_results_list[[country]]) || is.null(country_results_list[[country]]$medians)) {
cat("No valid median data from Analysis 7 (WITH 3SD) for", country, "\n"); return(NULL)
}
# Extract the raw median values
medians_avg <- country_results_list[[country]]$medians$average
medians_ideal <- country_results_list[[country]]$medians$ideal
medians_reas <- country_results_list[[country]]$medians$reasonable
if(all(is.na(medians_avg)) || all(is.na(medians_ideal)) || all(is.na(medians_reas))){
cat("Median vectors contain only NAs for", country, "\n"); return(NULL)
}
# Ensure intermediacy functions are defined
# is_ideal_side_median, is_average_side_median, is_both_sides_median
# --- Calculate vectors, counts, and p-values ---
ideal_vec <- mapply(is_ideal_side_median, medians_avg, medians_ideal, medians_reas)
ideal_count <- sum(ideal_vec, na.rm=TRUE); ideal_n <- sum(!is.na(ideal_vec))
ideal_p <- NA; ideal_test <- NULL
if(ideal_n > 0){ ideal_test <- tryCatch(binom.test(ideal_count, ideal_n, p=0.5, alt="greater"), error=function(e) NULL); if(!is.null(ideal_test)) ideal_p <- ideal_test$p.value }
avg_vec <- mapply(is_average_side_median, medians_avg, medians_ideal, medians_reas)
avg_count <- sum(avg_vec, na.rm=TRUE); avg_n <- sum(!is.na(avg_vec))
avg_p <- NA; avg_test <- NULL
if(avg_n > 0){ avg_test <- tryCatch(binom.test(avg_count, avg_n, p=0.5, alt="greater"), error=function(e) NULL); if(!is.null(avg_test)) avg_p <- avg_test$p.value }
both_vec <- mapply(is_both_sides_median, medians_avg, medians_ideal, medians_reas)
both_count <- sum(both_vec, na.rm=TRUE); both_n <- sum(!is.na(both_vec))
both_p <- NA; both_test <- NULL
if(both_n > 0){ both_test <- tryCatch(binom.test(both_count, both_n, p=1/3, alt="two.sided"), error=function(e) NULL); if(!is.null(both_test)) both_p <- both_test$p.value }
# Create summary table
summary_df <- data.frame(
Test = c("On Ideal Side", "On Average Side", "Between"),
Count = c(ideal_count, avg_count, both_count),
N = c(ideal_n, avg_n, both_n),
Prop = c(ideal_count/ideal_n, avg_count/avg_n, both_count/both_n),
p_value = c(ideal_p, avg_p, both_p)
)
# Handle potential NaN from 0/0
summary_df$Prop <- ifelse(is.nan(summary_df$Prop), NA, summary_df$Prop)
summary_df$Proportion <- ifelse(is.na(summary_df$Prop), "NA", scales::percent(summary_df$Prop, accuracy=0.1))
summary_df$p_value <- sapply(summary_df$p_value, format_pvalue) # Ensure format_pvalue handles NA
## --- REMOVED THIS LINE to prevent raw HTML output during loop ---
# cat("\nMedian Intermediacy Analysis (WITH 3SD Removed) for", country, ":\n")
# print(kable(summary_df[,c("Test", "Count", "N", "Proportion", "p_value")]) %>% kable_styling(font_size=10))
# Return results
return(list(country=country, ideal_side=list(vector=ideal_vec, count=ideal_count, n=ideal_n, prop=ideal_count/ideal_n, test=ideal_test), average_side=list(vector=avg_vec, count=avg_count, n=avg_n, prop=avg_count/avg_n, test=avg_test), both_sides=list(vector=both_vec, count=both_count, n=both_n, prop=both_count/both_n, test=both_test), summary=summary_df))
}
# Run analysis for each country (using one of the WITH 3SD results lists)
for(country in countries) {
# Ensure the source list exists and has data for the country
if (exists("country_results_median_3sd_plusone") && !is.null(country_results_median_3sd_plusone[[country]])) {
country_intermediacy_results_median_3sd[[country]] <- analyze_country_intermediacy_median_3sd(country, country_results_median_3sd_plusone) # Using _plusone results arbitrarily
} else {
cat("\nSkipping intermediacy for", country, "due to missing results from Analysis 7 (WITH 3SD).\n")
}
}
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: USA ==========
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: Colombia ==========
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: Brazil ==========
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: Germany ==========
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: India ==========
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: Lithuania ==========
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: Italy ==========
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: Poland ==========
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: Spain ==========
##
## ========== Analyzing Median Intermediacy (WITH 3SD) for: Netherlands ==========
# Create summary table for median intermediacy across countries (WITH 3SD)
intermediacy_by_country_median_3sd <- data.frame(Country=character(), Ideal_Prop=numeric(), Ideal_P=numeric(), Ideal_N=integer(), Avg_Prop=numeric(), Avg_P=numeric(), Avg_N=integer(), Between_Prop=numeric(), Between_P=numeric(), Between_N=integer(), stringsAsFactors=FALSE)
for(country in countries) {
res <- country_intermediacy_results_median_3sd[[country]]
if(!is.null(res)) {
# Extract p-values safely
ideal_p_val <- if(!is.null(res$ideal_side$test) && !is.null(res$ideal_side$test$p.value)) res$ideal_side$test$p.value else NA
avg_p_val <- if(!is.null(res$average_side$test) && !is.null(res$average_side$test$p.value)) res$average_side$test$p.value else NA
between_p_val <- if(!is.null(res$both_sides$test) && !is.null(res$both_sides$test$p.value)) res$both_sides$test$p.value else NA
intermediacy_by_country_median_3sd <- rbind(intermediacy_by_country_median_3sd, data.frame(Country=country,
Ideal_Prop=res$ideal_side$prop, Ideal_P=ideal_p_val, Ideal_N=res$ideal_side$n,
Avg_Prop=res$average_side$prop, Avg_P=avg_p_val, Avg_N=res$average_side$n,
Between_Prop=res$both_sides$prop, Between_P=between_p_val, Between_N=res$both_sides$n,
stringsAsFactors=FALSE))
} else {
intermediacy_by_country_median_3sd <- rbind(intermediacy_by_country_median_3sd, data.frame(Country=country, Ideal_Prop=NA, Ideal_P=NA, Ideal_N=0, Avg_Prop=NA, Avg_P=NA, Avg_N=0, Between_Prop=NA, Between_P=NA, Between_N=0, stringsAsFactors=FALSE))
}
}
# Format and display - THIS WILL RENDER CORRECTLY IN KNITTED HTML
# Need to handle NAs before formatting
intermediacy_by_country_median_3sd <- intermediacy_by_country_median_3sd %>%
mutate(
Ideal_Prop_Pct = ifelse(is.na(Ideal_Prop), NA, scales::percent(Ideal_Prop, accuracy = 0.1)),
Avg_Prop_Pct = ifelse(is.na(Avg_Prop), NA, scales::percent(Avg_Prop, accuracy = 0.1)),
Between_Prop_Pct = ifelse(is.na(Between_Prop), NA, scales::percent(Between_Prop, accuracy = 0.1)),
Ideal_Sig = ifelse(!is.na(Ideal_P) & Ideal_P < 0.05, "*", ""),
Avg_Sig = ifelse(!is.na(Avg_P) & Avg_P < 0.05, "*", ""),
Between_Sig = ifelse(!is.na(Between_P) & Between_P < 0.05, "*", "")
)
# Arrange based on numeric proportion before formatting
intermediacy_by_country_median_3sd <- intermediacy_by_country_median_3sd %>%
arrange(desc(Between_Prop)) # Arrange using the numeric column
# Corrected select statement
display_intermediacy_3sd <- intermediacy_by_country_median_3sd %>%
select(Country,
`Ideal Side (%)` = Ideal_Prop_Pct, Ideal_Sig = Ideal_Sig,
`Average Side (%)` = Avg_Prop_Pct, Average_Sig = Avg_Sig,
`Between (%)` = Between_Prop_Pct, Between_Sig = Between_Sig
)
kable(display_intermediacy_3sd, caption = "Intermediacy Analysis by Country (Median, WITH 3SD Removal)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F) %>%
add_header_above(c(" " = 1, "Ideal Side (%)*" = 2, "Average Side (%)*" = 2, "Between (%)*" = 2)) %>%
column_spec(1, bold = T)
| Country | Ideal Side (%) | Ideal_Sig | Average Side (%) | Average_Sig | Between (%) | Between_Sig |
|---|---|---|---|---|---|---|
| Germany | 84.8% |
|
39.4% | 100.0% |
|
|
| Lithuania | 84.8% |
|
45.5% | 100.0% |
|
|
| Poland | 63.6% | 45.5% | 100.0% |
|
||
| USA | 84.8% |
|
42.4% | 97.0% |
|
|
| Spain | 69.7% |
|
45.5% | 90.9% |
|
|
| Colombia | 75.8% |
|
42.4% | 87.9% |
|
|
| Netherlands | 75.8% |
|
42.4% | 87.9% |
|
|
| Brazil | 75.8% |
|
36.4% | 81.8% |
|
|
| Italy | 75.8% |
|
33.3% | 81.8% |
|
|
| India | 87.9% |
|
39.4% | 3.0% |
|
# Store results
analysis_8_results_3sd <- list(
country_results = country_intermediacy_results_median_3sd,
summary = intermediacy_by_country_median_3sd # Contains raw P values and formatted proportions
)
# Create a mapping for question numbers to descriptive labels
question_labels <- c(
"q1" = "Hrs TV / day", "q2" = "Sugary drinks / wk", "q3" = "Exercise hrs / wk",
"q4" = "Calories / day", "q5" = "Vegetable servings / mo", "q6" = "Lies told / wk",
"q7" = "Mins doctor late", "q8" = "Books read / yr", "q9" = "Romantic partners / life",
"q10" = "Int'l conflicts / decade", "q11" = "Dollars cheated on taxes", "q12" = "% Cheating students",
"q13" = "Phone checks / day", "q14" = "Mins phone wait time", "q15" = "Calls to parent / mo",
"q16" = "House cleans / mo", "q17" = "Computer crashes / mo", "q18" = "% School dropouts",
"q20" = "% Kids bullied", "q21" = "College bro drinks / wknd", "q22" = "Days to accept contract",
"q23" = "Wks to return product", "q24" = "Hrs to reflect on offer", "q25" = "Extra costs ($10k build)",
"q26" = "Wks construction delay", "q27" = "Loud events near homes / yr", "q28" = "% Car profits on safety",
"q29" = "% Medical details desired", "q30" = "Wks wait for criminal trial", "q31" = "Hourly atty fee (charity)",
"q32" = "Hrs landlord entry notice", "q33" = "Loan interest rate (%)", "q34" = "Polluter recidivism (%)"
)
# Prepare the data for visualization
prepare_figure1_data <- function() {
# Extract country information
countries <- unique(Data_All$Country_name)
# Initialize empty dataframe
figure1_data <- data.frame()
# Add Poland expert category if it exists
if("Poland_Expert" %in% countries) {
countries <- countries[countries != "Poland_Expert"]
countries <- c(countries, "Poland_Expert")
}
# Process each country
for(country in countries) {
country_name <- country
if(country == "Poland_Expert") {
country_filter <- "Poland"
country_name <- "Poland_Expert"
} else {
country_filter <- country
}
# Filter by country and condition
if(country == "Poland_Expert") {
# Special handling for Poland expert data if needed
country_avg <- Data_All_Average_Pass1 %>%
filter(Country_name == country_filter, Expert == TRUE)
country_ideal <- Data_All_Ideal_Pass1 %>%
filter(Country_name == country_filter, Expert == TRUE)
country_reasonable <- Data_All_Reasonable_Pass1 %>%
filter(Country_name == country_filter, Expert == TRUE)
} else {
country_avg <- Data_All_Average_Pass1 %>%
filter(Country_name == country_filter)
country_ideal <- Data_All_Ideal_Pass1 %>%
filter(Country_name == country_filter)
country_reasonable <- Data_All_Reasonable_Pass1 %>%
filter(Country_name == country_filter)
}
# Calculate medians for each question by condition
for(q in question_cols) {
if(q %in% colnames(country_avg) && q %in% colnames(country_ideal) && q %in% colnames(country_reasonable)) {
# Calculate medians
median_avg <- median(as.numeric(country_avg[[q]]), na.rm = TRUE)
median_ideal <- median(as.numeric(country_ideal[[q]]), na.rm = TRUE)
median_reasonable <- median(as.numeric(country_reasonable[[q]]), na.rm = TRUE)
# Add to dataframe
figure1_data <- rbind(figure1_data, data.frame(
Country = country_name,
Item = q,
Median_average = median_avg,
Median_ideal = median_ideal,
Median_reasonable = median_reasonable
))
}
}
}
return(figure1_data)
}
# Generate the data
figure1_data <- prepare_figure1_data()
# Process the data to match the paper's visualization
processed_data <- figure1_data %>%
mutate(
Item_label = question_labels[Item],
Color = ifelse(Median_reasonable >= Median_average & Median_reasonable <= Median_ideal |
Median_reasonable <= Median_average & Median_reasonable >= Median_ideal,
"navy", "firebrick"),
Shape = case_when(
Median_ideal < Median_average ~ 'Less', # Downward triangle
Median_ideal > Median_average ~ 'More', # Upward triangle
Median_ideal == Median_average ~ 'Equal' # Circle
),
Ratio = log2(Median_ideal/Median_average)
)
# Calculate the ratio for ordering items
item_ratios <- processed_data %>%
group_by(Item) %>%
summarize(Avg_ratio = mean(Ratio, na.rm = TRUE))
# Join with the processed data
processed_data <- processed_data %>%
left_join(item_ratios, by = "Item")
# Define country abbreviation labels
CountryLabels = c(
'Brazil' = 'BR',
'Colombia' = 'CO',
'Germany' = 'DE',
'India' = 'IN',
'Italy' = 'IT',
'Lithuania' = 'LT',
'Netherlands' = 'NL',
'Poland' = 'PL',
'Poland_Expert' = 'PL*',
'Spain' = 'ES',
'USA' = 'US'
)
# Update country names to use abbreviations for plotting
processed_data$Country_label <- CountryLabels[processed_data$Country]
# Create the figure
figure1 <- ggplot(processed_data,
aes(x = Country, y = reorder(Item_label, Avg_ratio),
fill = Color, color = Color)) +
geom_point(aes(shape = Shape), size = 5, alpha = 0.9, stroke = 1) +
geom_text(aes(label = round(Median_reasonable)),
size = 2.5, color = "white") +
scale_fill_identity() +
scale_color_identity() +
scale_shape_manual(values = c('Equal' = 21, 'Less' = 25, 'More' = 24)) +
scale_x_discrete(labels = CountryLabels, position = "top") +
theme_minimal() +
theme(
axis.title = element_blank(),
legend.position = "none",
text = element_text(color = "black"),
axis.text.x = element_text(face = "bold", color = "black", size = 10),
axis.text.y = element_text(size = 9),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
# Display the figure
print(figure1)
# Save the figure
ggsave("Figure1_Reasonableness.jpg", figure1, dpi = 300, width = 16, height = 25, units = "cm")
# Create a mapping for question numbers to descriptive labels
question_labels <- c(
"q1" = "Hrs TV / day", "q2" = "Sugary drinks / wk", "q3" = "Exercise hrs / wk",
"q4" = "Calories / day", "q5" = "Vegetable servings / mo", "q6" = "Lies told / wk",
"q7" = "Mins doctor late", "q8" = "Books read / yr", "q9" = "Romantic partners / life",
"q10" = "Int'l conflicts / decade", "q11" = "Dollars cheated on taxes", "q12" = "% Cheating students",
"q13" = "Phone checks / day", "q14" = "Mins phone wait time", "q15" = "Calls to parent / mo",
"q16" = "House cleans / mo", "q17" = "Computer crashes / mo", "q18" = "% School dropouts",
"q20" = "% Kids bullied", "q21" = "College bro drinks / wknd", "q22" = "Days to accept contract",
"q23" = "Wks to return product", "q24" = "Hrs to reflect on offer", "q25" = "Extra costs ($10k build)",
"q26" = "Wks construction delay", "q27" = "Loud events near homes / yr", "q28" = "% Car profits on safety",
"q29" = "% Medical details desired", "q30" = "Wks wait for criminal trial", "q31" = "Hourly atty fee (charity)",
"q32" = "Hrs landlord entry notice", "q33" = "Loan interest rate (%)", "q34" = "Polluter recidivism (%)"
)
# Prepare the data for visualization
prepare_figure1_data <- function() {
# Extract country information
countries <- unique(Data_All$Country_name)
# Initialize empty dataframe
figure1_data <- data.frame()
# Add Poland expert category if it exists
if("Poland_Expert" %in% countries) {
countries <- countries[countries != "Poland_Expert"]
countries <- c(countries, "Poland_Expert")
}
# Process each country
for(country in countries) {
country_name <- country
if(country == "Poland_Expert") {
country_filter <- "Poland"
country_name <- "Poland_Expert"
} else {
country_filter <- country
}
# Filter by country and condition
if(country == "Poland_Expert") {
# Special handling for Poland expert data if needed
country_avg <- Data_All_Average_Filtered %>%
filter(Country_name == country_filter, Expert == TRUE)
country_ideal <- Data_All_Ideal_Filtered %>%
filter(Country_name == country_filter, Expert == TRUE)
country_reasonable <- Data_All_Reasonable_Filtered %>%
filter(Country_name == country_filter, Expert == TRUE)
} else {
country_avg <- Data_All_Average_Filtered %>%
filter(Country_name == country_filter)
country_ideal <- Data_All_Ideal_Filtered %>%
filter(Country_name == country_filter)
country_reasonable <- Data_All_Reasonable_Filtered %>%
filter(Country_name == country_filter)
}
# Calculate medians for each question by condition
for(q in question_cols) {
if(q %in% colnames(country_avg) && q %in% colnames(country_ideal) && q %in% colnames(country_reasonable)) {
# Calculate medians
median_avg <- median(as.numeric(country_avg[[q]]), na.rm = TRUE)
median_ideal <- median(as.numeric(country_ideal[[q]]), na.rm = TRUE)
median_reasonable <- median(as.numeric(country_reasonable[[q]]), na.rm = TRUE)
# Add to dataframe
figure1_data <- rbind(figure1_data, data.frame(
Country = country_name,
Item = q,
Median_average = median_avg,
Median_ideal = median_ideal,
Median_reasonable = median_reasonable
))
}
}
}
return(figure1_data)
}
# Generate the data
figure1_data <- prepare_figure1_data()
# Process the data to match the paper's visualization
processed_data <- figure1_data %>%
mutate(
Item_label = question_labels[Item],
Color = ifelse(Median_reasonable >= Median_average & Median_reasonable <= Median_ideal |
Median_reasonable <= Median_average & Median_reasonable >= Median_ideal,
"navy", "firebrick"),
Shape = case_when(
Median_ideal < Median_average ~ 'Less', # Downward triangle
Median_ideal > Median_average ~ 'More', # Upward triangle
Median_ideal == Median_average ~ 'Equal' # Circle
),
Ratio = log2(Median_ideal/Median_average)
)
# Calculate the ratio for ordering items
item_ratios <- processed_data %>%
group_by(Item) %>%
summarize(Avg_ratio = mean(Ratio, na.rm = TRUE))
# Join with the processed data
processed_data <- processed_data %>%
left_join(item_ratios, by = "Item")
# Define country abbreviation labels
CountryLabels = c(
'Brazil' = 'BR',
'Colombia' = 'CO',
'Germany' = 'DE',
'India' = 'IN',
'Italy' = 'IT',
'Lithuania' = 'LT',
'Netherlands' = 'NL',
'Poland' = 'PL',
'Poland_Expert' = 'PL*',
'Spain' = 'ES',
'USA' = 'US'
)
# Update country names to use abbreviations for plotting
processed_data$Country_label <- CountryLabels[processed_data$Country]
# Create the figure
figure1 <- ggplot(processed_data,
aes(x = Country, y = reorder(Item_label, Avg_ratio),
fill = Color, color = Color)) +
geom_point(aes(shape = Shape), size = 5, alpha = 0.9, stroke = 1) +
geom_text(aes(label = round(Median_reasonable)),
size = 2.5, color = "white") +
scale_fill_identity() +
scale_color_identity() +
scale_shape_manual(values = c('Equal' = 21, 'Less' = 25, 'More' = 24)) +
scale_x_discrete(labels = CountryLabels, position = "top") +
theme_minimal() +
theme(
axis.title = element_blank(),
legend.position = "none",
text = element_text(color = "black"),
axis.text.x = element_text(face = "bold", color = "black", size = 10),
axis.text.y = element_text(size = 9),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)
# Display the figure
print(figure1)
# Save the figure
ggsave("Figure1_Reasonableness.jpg", figure1, dpi = 300, width = 16, height = 25, units = "cm")